MSG_DEALLOC_VGPRS slows down very small waveslot limited kernels. It's been identified this message is only really needed for VGPR limited kernels. A kernel becomes VGPR limited if a total number of VGPRs per SIMD / number of used VGPRs is more than a number of wave slots.
1415 lines
59 KiB
LLVM
1415 lines
59 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 | FileCheck %s -check-prefixes=GFX10
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s -check-prefix=GFX11
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12
|
|
|
|
;; Check that consecutive store operations are grouped greedily into
|
|
;; hard clauses of the appropriate length for each target.
|
|
;; This test uses <4 x i32> stores in order to prevent the stores from
|
|
;; being combined into larger operations due to their adjecency.
|
|
define amdgpu_kernel void @long_store_chain(ptr addrspace(1) %p) {
|
|
; GFX10-LABEL: long_store_chain:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
|
|
; GFX10-NEXT: s_mov_b32 s0, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX10-NEXT: s_mov_b32 s1, s0
|
|
; GFX10-NEXT: s_mov_b32 s2, s0
|
|
; GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, s0
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, s1
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, s3
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:16
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:32
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:48
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:64
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:80
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:96
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:112
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:128
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:144
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:160
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:176
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:192
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:208
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:224
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:240
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:256
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:272
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:288
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:304
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:320
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:336
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:352
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:368
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:384
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:400
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:416
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:432
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:448
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:464
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:480
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:496
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:512
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:528
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:544
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:560
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:576
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:592
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:608
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:624
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:640
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:656
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:672
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:688
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:704
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:720
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:736
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:752
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:768
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:784
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:800
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:816
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:832
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:848
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:864
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:880
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:896
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:912
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:928
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:944
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:960
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:976
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:992
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:1008
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:1024
|
|
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:1040
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: long_store_chain:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: s_mov_b32 s2, s0
|
|
; GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
|
|
; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s1
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5]
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:16
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:32
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:48
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:64
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:80
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:96
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:112
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:128
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:144
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:160
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:176
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:192
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:208
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:224
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:240
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:256
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:272
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:288
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:304
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:320
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:336
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:352
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:368
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:384
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:400
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:416
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:432
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:448
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:464
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:480
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:496
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:512
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:528
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:544
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:560
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:576
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:592
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:608
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:624
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:640
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:656
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:672
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:688
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:704
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:720
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:736
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:752
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:768
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:784
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:800
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:816
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:832
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:848
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:864
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:880
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:896
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:912
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:928
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:944
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:960
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:976
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:992
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1008
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1024
|
|
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1040
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: long_store_chain:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
|
|
; GFX12-NEXT: s_mov_b32 s0, 0
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_mov_b32 s1, s0
|
|
; GFX12-NEXT: s_mov_b32 s2, s0
|
|
; GFX12-NEXT: s_mov_b32 s3, s0
|
|
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
|
|
; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s1
|
|
; GFX12-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_clause 0x1f
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5]
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:16
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:32
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:48
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:64
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:80
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:96
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:112
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:128
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:144
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:160
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:176
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:192
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:208
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:224
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:240
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:256
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:272
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:288
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:304
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:320
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:336
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:352
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:368
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:384
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:400
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:416
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:432
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:448
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:464
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:480
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:496
|
|
; GFX12-NEXT: s_clause 0x1f
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:512
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:528
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:544
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:560
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:576
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:592
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:608
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:624
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:640
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:656
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:672
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:688
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:704
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:720
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:736
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:752
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:768
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:784
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:800
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:816
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:832
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:848
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:864
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:880
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:896
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:912
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:928
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:944
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:960
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:976
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:992
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1008
|
|
; GFX12-NEXT: s_clause 0x1
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1024
|
|
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1040
|
|
; GFX12-NEXT: s_endpgm
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %p
|
|
%ptr1 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 1
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr1
|
|
%ptr2 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 2
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr2
|
|
%ptr3 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 3
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr3
|
|
%ptr4 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 4
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr4
|
|
%ptr5 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 5
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr5
|
|
%ptr6 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 6
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr6
|
|
%ptr7 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 7
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr7
|
|
%ptr8 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 8
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr8
|
|
%ptr9 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 9
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr9
|
|
%ptr10 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 10
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr10
|
|
%ptr11 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 11
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr11
|
|
%ptr12 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 12
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr12
|
|
%ptr13 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 13
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr13
|
|
%ptr14 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 14
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr14
|
|
%ptr15 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 15
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr15
|
|
%ptr16 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 16
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr16
|
|
%ptr17 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 17
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr17
|
|
%ptr18 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 18
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr18
|
|
%ptr19 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 19
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr19
|
|
%ptr20 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 20
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr20
|
|
%ptr21 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 21
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr21
|
|
%ptr22 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 22
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr22
|
|
%ptr23 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 23
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr23
|
|
%ptr24 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 24
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr24
|
|
%ptr25 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 25
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr25
|
|
%ptr26 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 26
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr26
|
|
%ptr27 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 27
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr27
|
|
%ptr28 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 28
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr28
|
|
%ptr29 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 29
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr29
|
|
%ptr30 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 30
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr30
|
|
%ptr31 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 31
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr31
|
|
%ptr32 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 32
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr32
|
|
%ptr33 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 33
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr33
|
|
%ptr34 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 34
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr34
|
|
%ptr35 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 35
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr35
|
|
%ptr36 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 36
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr36
|
|
%ptr37 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 37
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr37
|
|
%ptr38 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 38
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr38
|
|
%ptr39 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 39
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr39
|
|
%ptr40 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 40
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr40
|
|
%ptr41 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 41
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr41
|
|
%ptr42 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 42
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr42
|
|
%ptr43 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 43
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr43
|
|
%ptr44 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 44
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr44
|
|
%ptr45 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 45
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr45
|
|
%ptr46 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 46
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr46
|
|
%ptr47 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 47
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr47
|
|
%ptr48 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 48
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr48
|
|
%ptr49 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 49
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr49
|
|
%ptr50 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 50
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr50
|
|
%ptr51 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 51
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr51
|
|
%ptr52 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 52
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr52
|
|
%ptr53 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 53
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr53
|
|
%ptr54 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 54
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr54
|
|
%ptr55 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 55
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr55
|
|
%ptr56 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 56
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr56
|
|
%ptr57 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 57
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr57
|
|
%ptr58 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 58
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr58
|
|
%ptr59 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 59
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr59
|
|
%ptr60 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 60
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr60
|
|
%ptr61 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 61
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr61
|
|
%ptr62 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 62
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr62
|
|
%ptr63 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 63
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr63
|
|
%ptr64 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 64
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr64
|
|
%ptr65 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 65
|
|
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr65
|
|
ret void
|
|
}
|
|
|
|
;; Long chain of loads since gfx10 doesn't cluster stores.
|
|
;; Use i32 loads to save on register pressure
|
|
define amdgpu_kernel void @long_load_chain(ptr addrspace(1) %p) {
|
|
; GFX10-LABEL: long_load_chain:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_clause 0x3e
|
|
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
|
|
; GFX10-NEXT: s_load_dword s3, s[0:1], 0x10
|
|
; GFX10-NEXT: s_load_dword s4, s[0:1], 0x20
|
|
; GFX10-NEXT: s_load_dword s5, s[0:1], 0x30
|
|
; GFX10-NEXT: s_load_dword s6, s[0:1], 0x40
|
|
; GFX10-NEXT: s_load_dword s7, s[0:1], 0x50
|
|
; GFX10-NEXT: s_load_dword s8, s[0:1], 0x60
|
|
; GFX10-NEXT: s_load_dword s9, s[0:1], 0x70
|
|
; GFX10-NEXT: s_load_dword s10, s[0:1], 0x80
|
|
; GFX10-NEXT: s_load_dword s11, s[0:1], 0x90
|
|
; GFX10-NEXT: s_load_dword s12, s[0:1], 0xa0
|
|
; GFX10-NEXT: s_load_dword s13, s[0:1], 0xb0
|
|
; GFX10-NEXT: s_load_dword s14, s[0:1], 0xc0
|
|
; GFX10-NEXT: s_load_dword s15, s[0:1], 0xd0
|
|
; GFX10-NEXT: s_load_dword s16, s[0:1], 0xe0
|
|
; GFX10-NEXT: s_load_dword s17, s[0:1], 0xf0
|
|
; GFX10-NEXT: s_load_dword s18, s[0:1], 0x100
|
|
; GFX10-NEXT: s_load_dword s19, s[0:1], 0x110
|
|
; GFX10-NEXT: s_load_dword s20, s[0:1], 0x120
|
|
; GFX10-NEXT: s_load_dword s21, s[0:1], 0x130
|
|
; GFX10-NEXT: s_load_dword s22, s[0:1], 0x140
|
|
; GFX10-NEXT: s_load_dword s23, s[0:1], 0x150
|
|
; GFX10-NEXT: s_load_dword s24, s[0:1], 0x160
|
|
; GFX10-NEXT: s_load_dword s25, s[0:1], 0x170
|
|
; GFX10-NEXT: s_load_dword s26, s[0:1], 0x180
|
|
; GFX10-NEXT: s_load_dword s27, s[0:1], 0x190
|
|
; GFX10-NEXT: s_load_dword s28, s[0:1], 0x1a0
|
|
; GFX10-NEXT: s_load_dword s29, s[0:1], 0x1b0
|
|
; GFX10-NEXT: s_load_dword s30, s[0:1], 0x1c0
|
|
; GFX10-NEXT: s_load_dword s31, s[0:1], 0x1d0
|
|
; GFX10-NEXT: s_load_dword s33, s[0:1], 0x1e0
|
|
; GFX10-NEXT: s_load_dword s34, s[0:1], 0x1f0
|
|
; GFX10-NEXT: s_load_dword s35, s[0:1], 0x200
|
|
; GFX10-NEXT: s_load_dword s36, s[0:1], 0x210
|
|
; GFX10-NEXT: s_load_dword s37, s[0:1], 0x220
|
|
; GFX10-NEXT: s_load_dword s38, s[0:1], 0x230
|
|
; GFX10-NEXT: s_load_dword s39, s[0:1], 0x240
|
|
; GFX10-NEXT: s_load_dword s40, s[0:1], 0x250
|
|
; GFX10-NEXT: s_load_dword s41, s[0:1], 0x260
|
|
; GFX10-NEXT: s_load_dword s42, s[0:1], 0x270
|
|
; GFX10-NEXT: s_load_dword s43, s[0:1], 0x280
|
|
; GFX10-NEXT: s_load_dword s44, s[0:1], 0x290
|
|
; GFX10-NEXT: s_load_dword s45, s[0:1], 0x2a0
|
|
; GFX10-NEXT: s_load_dword s46, s[0:1], 0x2b0
|
|
; GFX10-NEXT: s_load_dword s47, s[0:1], 0x2c0
|
|
; GFX10-NEXT: s_load_dword s48, s[0:1], 0x2d0
|
|
; GFX10-NEXT: s_load_dword s49, s[0:1], 0x2e0
|
|
; GFX10-NEXT: s_load_dword s50, s[0:1], 0x2f0
|
|
; GFX10-NEXT: s_load_dword s51, s[0:1], 0x300
|
|
; GFX10-NEXT: s_load_dword s52, s[0:1], 0x310
|
|
; GFX10-NEXT: s_load_dword s53, s[0:1], 0x320
|
|
; GFX10-NEXT: s_load_dword s54, s[0:1], 0x330
|
|
; GFX10-NEXT: s_load_dword s55, s[0:1], 0x340
|
|
; GFX10-NEXT: s_load_dword s56, s[0:1], 0x350
|
|
; GFX10-NEXT: s_load_dword s57, s[0:1], 0x360
|
|
; GFX10-NEXT: s_load_dword s58, s[0:1], 0x370
|
|
; GFX10-NEXT: s_load_dword s59, s[0:1], 0x380
|
|
; GFX10-NEXT: s_load_dword s60, s[0:1], 0x390
|
|
; GFX10-NEXT: s_load_dword s61, s[0:1], 0x3a0
|
|
; GFX10-NEXT: s_load_dword s62, s[0:1], 0x3b0
|
|
; GFX10-NEXT: s_load_dword s63, s[0:1], 0x3c0
|
|
; GFX10-NEXT: s_load_dword s64, s[0:1], 0x3d0
|
|
; GFX10-NEXT: s_load_dword s65, s[0:1], 0x3e0
|
|
; GFX10-NEXT: s_clause 0x2
|
|
; GFX10-NEXT: s_load_dword s66, s[0:1], 0x3f0
|
|
; GFX10-NEXT: s_load_dword s67, s[0:1], 0x400
|
|
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x410
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s2
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s3
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s4
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s5
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s6
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s7
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s8
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s9
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s10
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s11
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s12
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s13
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s14
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s15
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s16
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s17
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s18
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s19
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s20
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s21
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s22
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s23
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s24
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s25
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s26
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s27
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s28
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s29
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s30
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s31
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s33
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s34
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s35
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s36
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s37
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s38
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s39
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s40
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s41
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s42
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s43
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s44
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s45
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s46
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s47
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s48
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s49
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s50
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s51
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s52
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s53
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s54
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s55
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s56
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s57
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s58
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s59
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s60
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s61
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s62
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s63
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s64
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s65
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s66
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s67
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: ;;#ASMSTART
|
|
; GFX10-NEXT: ; use s0
|
|
; GFX10-NEXT: ;;#ASMEND
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: long_load_chain:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0
|
|
; GFX11-NEXT: s_load_b32 s3, s[0:1], 0x10
|
|
; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x20
|
|
; GFX11-NEXT: s_load_b32 s5, s[0:1], 0x30
|
|
; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x40
|
|
; GFX11-NEXT: s_load_b32 s7, s[0:1], 0x50
|
|
; GFX11-NEXT: s_load_b32 s8, s[0:1], 0x60
|
|
; GFX11-NEXT: s_load_b32 s9, s[0:1], 0x70
|
|
; GFX11-NEXT: s_load_b32 s10, s[0:1], 0x80
|
|
; GFX11-NEXT: s_load_b32 s11, s[0:1], 0x90
|
|
; GFX11-NEXT: s_load_b32 s12, s[0:1], 0xa0
|
|
; GFX11-NEXT: s_load_b32 s13, s[0:1], 0xb0
|
|
; GFX11-NEXT: s_load_b32 s14, s[0:1], 0xc0
|
|
; GFX11-NEXT: s_load_b32 s15, s[0:1], 0xd0
|
|
; GFX11-NEXT: s_load_b32 s16, s[0:1], 0xe0
|
|
; GFX11-NEXT: s_load_b32 s17, s[0:1], 0xf0
|
|
; GFX11-NEXT: s_load_b32 s18, s[0:1], 0x100
|
|
; GFX11-NEXT: s_load_b32 s19, s[0:1], 0x110
|
|
; GFX11-NEXT: s_load_b32 s20, s[0:1], 0x120
|
|
; GFX11-NEXT: s_load_b32 s21, s[0:1], 0x130
|
|
; GFX11-NEXT: s_load_b32 s22, s[0:1], 0x140
|
|
; GFX11-NEXT: s_load_b32 s23, s[0:1], 0x150
|
|
; GFX11-NEXT: s_load_b32 s24, s[0:1], 0x160
|
|
; GFX11-NEXT: s_load_b32 s25, s[0:1], 0x170
|
|
; GFX11-NEXT: s_load_b32 s26, s[0:1], 0x180
|
|
; GFX11-NEXT: s_load_b32 s27, s[0:1], 0x190
|
|
; GFX11-NEXT: s_load_b32 s28, s[0:1], 0x1a0
|
|
; GFX11-NEXT: s_load_b32 s29, s[0:1], 0x1b0
|
|
; GFX11-NEXT: s_load_b32 s30, s[0:1], 0x1c0
|
|
; GFX11-NEXT: s_load_b32 s31, s[0:1], 0x1d0
|
|
; GFX11-NEXT: s_load_b32 s33, s[0:1], 0x1e0
|
|
; GFX11-NEXT: s_load_b32 s34, s[0:1], 0x1f0
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: s_load_b32 s35, s[0:1], 0x200
|
|
; GFX11-NEXT: s_load_b32 s36, s[0:1], 0x210
|
|
; GFX11-NEXT: s_load_b32 s37, s[0:1], 0x220
|
|
; GFX11-NEXT: s_load_b32 s38, s[0:1], 0x230
|
|
; GFX11-NEXT: s_load_b32 s39, s[0:1], 0x240
|
|
; GFX11-NEXT: s_load_b32 s40, s[0:1], 0x250
|
|
; GFX11-NEXT: s_load_b32 s41, s[0:1], 0x260
|
|
; GFX11-NEXT: s_load_b32 s42, s[0:1], 0x270
|
|
; GFX11-NEXT: s_load_b32 s43, s[0:1], 0x280
|
|
; GFX11-NEXT: s_load_b32 s44, s[0:1], 0x290
|
|
; GFX11-NEXT: s_load_b32 s45, s[0:1], 0x2a0
|
|
; GFX11-NEXT: s_load_b32 s46, s[0:1], 0x2b0
|
|
; GFX11-NEXT: s_load_b32 s47, s[0:1], 0x2c0
|
|
; GFX11-NEXT: s_load_b32 s48, s[0:1], 0x2d0
|
|
; GFX11-NEXT: s_load_b32 s49, s[0:1], 0x2e0
|
|
; GFX11-NEXT: s_load_b32 s50, s[0:1], 0x2f0
|
|
; GFX11-NEXT: s_load_b32 s51, s[0:1], 0x300
|
|
; GFX11-NEXT: s_load_b32 s52, s[0:1], 0x310
|
|
; GFX11-NEXT: s_load_b32 s53, s[0:1], 0x320
|
|
; GFX11-NEXT: s_load_b32 s54, s[0:1], 0x330
|
|
; GFX11-NEXT: s_load_b32 s55, s[0:1], 0x340
|
|
; GFX11-NEXT: s_load_b32 s56, s[0:1], 0x350
|
|
; GFX11-NEXT: s_load_b32 s57, s[0:1], 0x360
|
|
; GFX11-NEXT: s_load_b32 s58, s[0:1], 0x370
|
|
; GFX11-NEXT: s_load_b32 s59, s[0:1], 0x380
|
|
; GFX11-NEXT: s_load_b32 s60, s[0:1], 0x390
|
|
; GFX11-NEXT: s_load_b32 s61, s[0:1], 0x3a0
|
|
; GFX11-NEXT: s_load_b32 s62, s[0:1], 0x3b0
|
|
; GFX11-NEXT: s_load_b32 s63, s[0:1], 0x3c0
|
|
; GFX11-NEXT: s_load_b32 s64, s[0:1], 0x3d0
|
|
; GFX11-NEXT: s_load_b32 s65, s[0:1], 0x3e0
|
|
; GFX11-NEXT: s_load_b32 s66, s[0:1], 0x3f0
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: s_load_b32 s67, s[0:1], 0x400
|
|
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x410
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s2
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s3
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s4
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s5
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s6
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s7
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s8
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s9
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s10
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s11
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s12
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s13
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s14
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s15
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s16
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s17
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s18
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s19
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s20
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s21
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s22
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s23
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s24
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s25
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s26
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s27
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s28
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s29
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s30
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s31
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s33
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s34
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s35
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s36
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s37
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s38
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s39
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s40
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s41
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s42
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s43
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s44
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s45
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s46
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s47
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s48
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s49
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s50
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s51
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s52
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s53
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s54
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s55
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s56
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s57
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s58
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s59
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s60
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s61
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s62
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s63
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s64
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s65
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s66
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s67
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: ; use s0
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: long_load_chain:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_clause 0x1f
|
|
; GFX12-NEXT: s_load_b32 s2, s[0:1], 0x0
|
|
; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x10
|
|
; GFX12-NEXT: s_load_b32 s4, s[0:1], 0x20
|
|
; GFX12-NEXT: s_load_b32 s5, s[0:1], 0x30
|
|
; GFX12-NEXT: s_load_b32 s6, s[0:1], 0x40
|
|
; GFX12-NEXT: s_load_b32 s7, s[0:1], 0x50
|
|
; GFX12-NEXT: s_load_b32 s8, s[0:1], 0x60
|
|
; GFX12-NEXT: s_load_b32 s9, s[0:1], 0x70
|
|
; GFX12-NEXT: s_load_b32 s10, s[0:1], 0x80
|
|
; GFX12-NEXT: s_load_b32 s11, s[0:1], 0x90
|
|
; GFX12-NEXT: s_load_b32 s12, s[0:1], 0xa0
|
|
; GFX12-NEXT: s_load_b32 s13, s[0:1], 0xb0
|
|
; GFX12-NEXT: s_load_b32 s14, s[0:1], 0xc0
|
|
; GFX12-NEXT: s_load_b32 s15, s[0:1], 0xd0
|
|
; GFX12-NEXT: s_load_b32 s16, s[0:1], 0xe0
|
|
; GFX12-NEXT: s_load_b32 s17, s[0:1], 0xf0
|
|
; GFX12-NEXT: s_load_b32 s18, s[0:1], 0x100
|
|
; GFX12-NEXT: s_load_b32 s19, s[0:1], 0x110
|
|
; GFX12-NEXT: s_load_b32 s20, s[0:1], 0x120
|
|
; GFX12-NEXT: s_load_b32 s21, s[0:1], 0x130
|
|
; GFX12-NEXT: s_load_b32 s22, s[0:1], 0x140
|
|
; GFX12-NEXT: s_load_b32 s23, s[0:1], 0x150
|
|
; GFX12-NEXT: s_load_b32 s24, s[0:1], 0x160
|
|
; GFX12-NEXT: s_load_b32 s25, s[0:1], 0x170
|
|
; GFX12-NEXT: s_load_b32 s26, s[0:1], 0x180
|
|
; GFX12-NEXT: s_load_b32 s27, s[0:1], 0x190
|
|
; GFX12-NEXT: s_load_b32 s28, s[0:1], 0x1a0
|
|
; GFX12-NEXT: s_load_b32 s29, s[0:1], 0x1b0
|
|
; GFX12-NEXT: s_load_b32 s30, s[0:1], 0x1c0
|
|
; GFX12-NEXT: s_load_b32 s31, s[0:1], 0x1d0
|
|
; GFX12-NEXT: s_load_b32 s33, s[0:1], 0x1e0
|
|
; GFX12-NEXT: s_load_b32 s34, s[0:1], 0x1f0
|
|
; GFX12-NEXT: s_clause 0x1f
|
|
; GFX12-NEXT: s_load_b32 s35, s[0:1], 0x200
|
|
; GFX12-NEXT: s_load_b32 s36, s[0:1], 0x210
|
|
; GFX12-NEXT: s_load_b32 s37, s[0:1], 0x220
|
|
; GFX12-NEXT: s_load_b32 s38, s[0:1], 0x230
|
|
; GFX12-NEXT: s_load_b32 s39, s[0:1], 0x240
|
|
; GFX12-NEXT: s_load_b32 s40, s[0:1], 0x250
|
|
; GFX12-NEXT: s_load_b32 s41, s[0:1], 0x260
|
|
; GFX12-NEXT: s_load_b32 s42, s[0:1], 0x270
|
|
; GFX12-NEXT: s_load_b32 s43, s[0:1], 0x280
|
|
; GFX12-NEXT: s_load_b32 s44, s[0:1], 0x290
|
|
; GFX12-NEXT: s_load_b32 s45, s[0:1], 0x2a0
|
|
; GFX12-NEXT: s_load_b32 s46, s[0:1], 0x2b0
|
|
; GFX12-NEXT: s_load_b32 s47, s[0:1], 0x2c0
|
|
; GFX12-NEXT: s_load_b32 s48, s[0:1], 0x2d0
|
|
; GFX12-NEXT: s_load_b32 s49, s[0:1], 0x2e0
|
|
; GFX12-NEXT: s_load_b32 s50, s[0:1], 0x2f0
|
|
; GFX12-NEXT: s_load_b32 s51, s[0:1], 0x300
|
|
; GFX12-NEXT: s_load_b32 s52, s[0:1], 0x310
|
|
; GFX12-NEXT: s_load_b32 s53, s[0:1], 0x320
|
|
; GFX12-NEXT: s_load_b32 s54, s[0:1], 0x330
|
|
; GFX12-NEXT: s_load_b32 s55, s[0:1], 0x340
|
|
; GFX12-NEXT: s_load_b32 s56, s[0:1], 0x350
|
|
; GFX12-NEXT: s_load_b32 s57, s[0:1], 0x360
|
|
; GFX12-NEXT: s_load_b32 s58, s[0:1], 0x370
|
|
; GFX12-NEXT: s_load_b32 s59, s[0:1], 0x380
|
|
; GFX12-NEXT: s_load_b32 s60, s[0:1], 0x390
|
|
; GFX12-NEXT: s_load_b32 s61, s[0:1], 0x3a0
|
|
; GFX12-NEXT: s_load_b32 s62, s[0:1], 0x3b0
|
|
; GFX12-NEXT: s_load_b32 s63, s[0:1], 0x3c0
|
|
; GFX12-NEXT: s_load_b32 s64, s[0:1], 0x3d0
|
|
; GFX12-NEXT: s_load_b32 s65, s[0:1], 0x3e0
|
|
; GFX12-NEXT: s_load_b32 s66, s[0:1], 0x3f0
|
|
; GFX12-NEXT: s_clause 0x1
|
|
; GFX12-NEXT: s_load_b32 s67, s[0:1], 0x400
|
|
; GFX12-NEXT: s_load_b32 s0, s[0:1], 0x410
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s2
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s3
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s4
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s5
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s6
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s7
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s8
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s9
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s10
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s11
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s12
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s13
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s14
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s15
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s16
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s17
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s18
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s19
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s20
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s21
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s22
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s23
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s24
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s25
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s26
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s27
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s28
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s29
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s30
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s31
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s33
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s34
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s35
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s36
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s37
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s38
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s39
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s40
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s41
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s42
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s43
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s44
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s45
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s46
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s47
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s48
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s49
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s50
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s51
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s52
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s53
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s54
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s55
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s56
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s57
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s58
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s59
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s60
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s61
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s62
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s63
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s64
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s65
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s66
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s67
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: ;;#ASMSTART
|
|
; GFX12-NEXT: ; use s0
|
|
; GFX12-NEXT: ;;#ASMEND
|
|
; GFX12-NEXT: s_endpgm
|
|
%v0 = load i32, ptr addrspace(1) %p
|
|
%ptr1 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 1
|
|
%v1 = load i32, ptr addrspace(1) %ptr1
|
|
%ptr2 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 2
|
|
%v2 = load i32, ptr addrspace(1) %ptr2
|
|
%ptr3 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 3
|
|
%v3 = load i32, ptr addrspace(1) %ptr3
|
|
%ptr4 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 4
|
|
%v4 = load i32, ptr addrspace(1) %ptr4
|
|
%ptr5 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 5
|
|
%v5 = load i32, ptr addrspace(1) %ptr5
|
|
%ptr6 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 6
|
|
%v6 = load i32, ptr addrspace(1) %ptr6
|
|
%ptr7 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 7
|
|
%v7 = load i32, ptr addrspace(1) %ptr7
|
|
%ptr8 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 8
|
|
%v8 = load i32, ptr addrspace(1) %ptr8
|
|
%ptr9 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 9
|
|
%v9 = load i32, ptr addrspace(1) %ptr9
|
|
%ptr10 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 10
|
|
%v10 = load i32, ptr addrspace(1) %ptr10
|
|
%ptr11 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 11
|
|
%v11 = load i32, ptr addrspace(1) %ptr11
|
|
%ptr12 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 12
|
|
%v12 = load i32, ptr addrspace(1) %ptr12
|
|
%ptr13 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 13
|
|
%v13 = load i32, ptr addrspace(1) %ptr13
|
|
%ptr14 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 14
|
|
%v14 = load i32, ptr addrspace(1) %ptr14
|
|
%ptr15 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 15
|
|
%v15 = load i32, ptr addrspace(1) %ptr15
|
|
%ptr16 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 16
|
|
%v16 = load i32, ptr addrspace(1) %ptr16
|
|
%ptr17 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 17
|
|
%v17 = load i32, ptr addrspace(1) %ptr17
|
|
%ptr18 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 18
|
|
%v18 = load i32, ptr addrspace(1) %ptr18
|
|
%ptr19 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 19
|
|
%v19 = load i32, ptr addrspace(1) %ptr19
|
|
%ptr20 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 20
|
|
%v20 = load i32, ptr addrspace(1) %ptr20
|
|
%ptr21 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 21
|
|
%v21 = load i32, ptr addrspace(1) %ptr21
|
|
%ptr22 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 22
|
|
%v22 = load i32, ptr addrspace(1) %ptr22
|
|
%ptr23 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 23
|
|
%v23 = load i32, ptr addrspace(1) %ptr23
|
|
%ptr24 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 24
|
|
%v24 = load i32, ptr addrspace(1) %ptr24
|
|
%ptr25 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 25
|
|
%v25 = load i32, ptr addrspace(1) %ptr25
|
|
%ptr26 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 26
|
|
%v26 = load i32, ptr addrspace(1) %ptr26
|
|
%ptr27 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 27
|
|
%v27 = load i32, ptr addrspace(1) %ptr27
|
|
%ptr28 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 28
|
|
%v28 = load i32, ptr addrspace(1) %ptr28
|
|
%ptr29 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 29
|
|
%v29 = load i32, ptr addrspace(1) %ptr29
|
|
%ptr30 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 30
|
|
%v30 = load i32, ptr addrspace(1) %ptr30
|
|
%ptr31 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 31
|
|
%v31 = load i32, ptr addrspace(1) %ptr31
|
|
%ptr32 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 32
|
|
%v32 = load i32, ptr addrspace(1) %ptr32
|
|
%ptr33 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 33
|
|
%v33 = load i32, ptr addrspace(1) %ptr33
|
|
%ptr34 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 34
|
|
%v34 = load i32, ptr addrspace(1) %ptr34
|
|
%ptr35 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 35
|
|
%v35 = load i32, ptr addrspace(1) %ptr35
|
|
%ptr36 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 36
|
|
%v36 = load i32, ptr addrspace(1) %ptr36
|
|
%ptr37 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 37
|
|
%v37 = load i32, ptr addrspace(1) %ptr37
|
|
%ptr38 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 38
|
|
%v38 = load i32, ptr addrspace(1) %ptr38
|
|
%ptr39 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 39
|
|
%v39 = load i32, ptr addrspace(1) %ptr39
|
|
%ptr40 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 40
|
|
%v40 = load i32, ptr addrspace(1) %ptr40
|
|
%ptr41 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 41
|
|
%v41 = load i32, ptr addrspace(1) %ptr41
|
|
%ptr42 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 42
|
|
%v42 = load i32, ptr addrspace(1) %ptr42
|
|
%ptr43 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 43
|
|
%v43 = load i32, ptr addrspace(1) %ptr43
|
|
%ptr44 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 44
|
|
%v44 = load i32, ptr addrspace(1) %ptr44
|
|
%ptr45 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 45
|
|
%v45 = load i32, ptr addrspace(1) %ptr45
|
|
%ptr46 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 46
|
|
%v46 = load i32, ptr addrspace(1) %ptr46
|
|
%ptr47 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 47
|
|
%v47 = load i32, ptr addrspace(1) %ptr47
|
|
%ptr48 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 48
|
|
%v48 = load i32, ptr addrspace(1) %ptr48
|
|
%ptr49 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 49
|
|
%v49 = load i32, ptr addrspace(1) %ptr49
|
|
%ptr50 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 50
|
|
%v50 = load i32, ptr addrspace(1) %ptr50
|
|
%ptr51 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 51
|
|
%v51 = load i32, ptr addrspace(1) %ptr51
|
|
%ptr52 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 52
|
|
%v52 = load i32, ptr addrspace(1) %ptr52
|
|
%ptr53 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 53
|
|
%v53 = load i32, ptr addrspace(1) %ptr53
|
|
%ptr54 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 54
|
|
%v54 = load i32, ptr addrspace(1) %ptr54
|
|
%ptr55 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 55
|
|
%v55 = load i32, ptr addrspace(1) %ptr55
|
|
%ptr56 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 56
|
|
%v56 = load i32, ptr addrspace(1) %ptr56
|
|
%ptr57 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 57
|
|
%v57 = load i32, ptr addrspace(1) %ptr57
|
|
%ptr58 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 58
|
|
%v58 = load i32, ptr addrspace(1) %ptr58
|
|
%ptr59 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 59
|
|
%v59 = load i32, ptr addrspace(1) %ptr59
|
|
%ptr60 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 60
|
|
%v60 = load i32, ptr addrspace(1) %ptr60
|
|
%ptr61 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 61
|
|
%v61 = load i32, ptr addrspace(1) %ptr61
|
|
%ptr62 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 62
|
|
%v62 = load i32, ptr addrspace(1) %ptr62
|
|
%ptr63 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 63
|
|
%v63 = load i32, ptr addrspace(1) %ptr63
|
|
%ptr64 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 64
|
|
%v64 = load i32, ptr addrspace(1) %ptr64
|
|
%ptr65 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 65
|
|
%v65 = load i32, ptr addrspace(1) %ptr65
|
|
|
|
call void asm sideeffect "; use $0", "s"(i32 %v0)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v1)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v2)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v3)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v4)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v5)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v6)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v7)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v8)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v9)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v10)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v11)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v12)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v13)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v14)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v15)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v16)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v17)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v18)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v19)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v20)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v21)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v22)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v23)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v24)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v25)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v26)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v27)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v28)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v29)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v30)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v31)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v32)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v33)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v34)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v35)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v36)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v37)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v38)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v39)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v40)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v41)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v42)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v43)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v44)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v45)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v46)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v47)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v48)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v49)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v50)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v51)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v52)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v53)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v54)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v55)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v56)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v57)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v58)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v59)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v60)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v61)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v62)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v63)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v64)
|
|
call void asm sideeffect "; use $0", "s"(i32 %v65)
|
|
|
|
ret void
|
|
}
|