Currently, the custom SGPR spill lowering pass spills SGPRs into physical VGPR lanes and the remaining VGPRs are used by regalloc for vector regclass allocation. This imposes many restrictions that we ended up with unsuccessful SGPR spilling when there won't be enough VGPRs and we are forced to spill the leftover into memory during PEI. The custom spill handling during PEI has many edge cases and often breaks the compiler time to time. This patch implements spilling SGPRs into virtual VGPR lanes. Since we now split the register allocation for SGPRs and VGPRs, the virtual registers introduced for the spill lanes would get allocated automatically in the subsequent regalloc invocation for VGPRs. Spill to virtual registers will always be successful, even in the high-pressure situations, and hence it avoids most of the edge cases during PEI. We are now left with only the custom SGPR spills during PEI for special registers like the frame pointer which isn an unproblematic case. This patch also implements the whole wave spills which might occur if RA spills any live range of virtual registers involved in the whole wave operations. Earlier, we had been hand-picking registers for such machine operands. But now with SGPR spills into virtual VGPR lanes, we are exposing them to the allocator. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D124196
921 lines
46 KiB
LLVM
921 lines
46 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -greedy-regclass-priority-trumps-globalness=1 -o - %s | FileCheck -check-prefixes=GFX90A,GLOBALNESS1 %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -greedy-regclass-priority-trumps-globalness=0 -o - %s | FileCheck -check-prefixes=GFX90A,GLOBALNESS0 %s
|
|
|
|
declare void @wobble()
|
|
|
|
define internal fastcc void @widget() {
|
|
; GFX90A-LABEL: widget:
|
|
; GFX90A: ; %bb.0: ; %bb
|
|
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX90A-NEXT: s_mov_b32 s16, s33
|
|
; GFX90A-NEXT: s_mov_b32 s33, s32
|
|
; GFX90A-NEXT: s_xor_saveexec_b64 s[18:19], -1
|
|
; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; 4-byte Folded Spill
|
|
; GFX90A-NEXT: s_mov_b64 exec, -1
|
|
; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
|
; GFX90A-NEXT: s_mov_b64 exec, s[18:19]
|
|
; GFX90A-NEXT: s_addk_i32 s32, 0x400
|
|
; GFX90A-NEXT: v_writelane_b32 v40, s16, 0
|
|
; GFX90A-NEXT: s_getpc_b64 s[16:17]
|
|
; GFX90A-NEXT: s_add_u32 s16, s16, wobble@gotpcrel32@lo+4
|
|
; GFX90A-NEXT: s_addc_u32 s17, s17, wobble@gotpcrel32@hi+12
|
|
; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
|
|
; GFX90A-NEXT: ; implicit-def: $vgpr0
|
|
; GFX90A-NEXT: v_writelane_b32 v0, s30, 0
|
|
; GFX90A-NEXT: v_writelane_b32 v0, s31, 1
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
bb:
|
|
tail call void @wobble()
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, i32 %tmp5.i.i, i32 %tmp427.i, i1 %tmp438.i, double %tmp27.i, i1 %tmp48.i) {
|
|
; GLOBALNESS1-LABEL: kernel:
|
|
; GLOBALNESS1: ; %bb.0: ; %bb
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[54:55], s[6:7]
|
|
; GLOBALNESS1-NEXT: s_load_dwordx4 s[36:39], s[8:9], 0x0
|
|
; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
|
|
; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off
|
|
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GLOBALNESS1-NEXT: global_load_dword v0, v42, s[36:37]
|
|
; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[64:65], s[4:5]
|
|
; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
|
|
; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20
|
|
; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
|
; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s17
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, 0x40994400
|
|
; GLOBALNESS1-NEXT: s_bitcmp1_b32 s38, 0
|
|
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[40:41], s[4:5], v[42:43]
|
|
; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[42:43], s[4:5], 0
|
|
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GLOBALNESS1-NEXT: s_xor_b64 s[94:95], s[4:5], -1
|
|
; GLOBALNESS1-NEXT: s_bitcmp1_b32 s6, 0
|
|
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
|
|
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GLOBALNESS1-NEXT: s_xor_b64 s[88:89], s[4:5], -1
|
|
; GLOBALNESS1-NEXT: s_bitcmp1_b32 s7, 0
|
|
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GLOBALNESS1-NEXT: s_getpc_b64 s[6:7]
|
|
; GLOBALNESS1-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12
|
|
; GLOBALNESS1-NEXT: s_xor_b64 s[86:87], s[4:5], -1
|
|
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr40
|
|
; GLOBALNESS1-NEXT: s_load_dwordx2 s[66:67], s[6:7], 0x0
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s98, s16
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[62:63], s[8:9]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s99, s15
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s56, s14
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[92:93], 0x80
|
|
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[36:37], 1, v1
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s69, 0x3ff00000
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s32, 0
|
|
; GLOBALNESS1-NEXT: ; implicit-def: $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
|
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0)
|
|
; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
|
|
; GLOBALNESS1-NEXT: v_writelane_b32 v40, s4, 0
|
|
; GLOBALNESS1-NEXT: v_writelane_b32 v40, s5, 1
|
|
; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
|
|
; GLOBALNESS1-NEXT: v_writelane_b32 v40, s4, 2
|
|
; GLOBALNESS1-NEXT: v_writelane_b32 v40, s5, 3
|
|
; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
|
|
; GLOBALNESS1-NEXT: v_writelane_b32 v40, s4, 4
|
|
; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[90:91], 1, v0
|
|
; GLOBALNESS1-NEXT: v_writelane_b32 v40, s5, 5
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_4
|
|
; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v40, 4
|
|
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v40, 5
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29
|
|
; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow6
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0
|
|
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
|
|
; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow19
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a63, v31
|
|
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a62, v30
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a61, v29
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a60, v28
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a59, v27
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a58, v26
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a57, v25
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a56, v24
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a55, v23
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a54, v22
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a53, v21
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a52, v20
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a51, v19
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a50, v18
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a49, v17
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a48, v16
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a47, v15
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a46, v14
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a45, v13
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a44, v12
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a43, v11
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a42, v10
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a41, v9
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a40, v8
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a39, v7
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a38, v6
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a37, v5
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a36, v4
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a35, v3
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a34, v2
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a33, v1
|
|
; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a32, v0
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_30
|
|
; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5
|
|
; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1
|
|
; GLOBALNESS1-NEXT: ; Child Loop BB1_15 Depth 2
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[92:93], s[92:93] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: flat_load_dword v44, v[0:1]
|
|
; GLOBALNESS1-NEXT: s_add_u32 s8, s62, 40
|
|
; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0
|
|
; GLOBALNESS1-NEXT: flat_load_dword v45, v[0:1]
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s9, s63, 0
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[66:67]
|
|
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[36:37]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
|
|
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_8
|
|
; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_cmp_lt_i32 s39, 1
|
|
; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7
|
|
; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock3
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_cmp_lg_u32 s39, 1
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
|
|
; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
|
|
; GLOBALNESS1-NEXT: s_cbranch_execnz .LBB1_8
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_23
|
|
; GLOBALNESS1-NEXT: .LBB1_7: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0
|
|
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_23
|
|
; GLOBALNESS1-NEXT: .LBB1_8: ; %Flow16
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24
|
|
; GLOBALNESS1-NEXT: .LBB1_9: ; %baz.exit.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0
|
|
; GLOBALNESS1-NEXT: flat_load_dword v0, v[32:33]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s68, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s70, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s71, s69
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s72, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s73, s69
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s74, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s75, s69
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s76, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s77, s69
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s78, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s79, s69
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s80, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s81, s69
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s82, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s83, s69
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s84, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s85, s69
|
|
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[96:97], 0, v0
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[70:71], s[96:97]
|
|
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26
|
|
; GLOBALNESS1-NEXT: ; %bb.10: ; %bb33.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[32:33], off
|
|
; GLOBALNESS1-NEXT: v_readlane_b32 s4, v40, 0
|
|
; GLOBALNESS1-NEXT: v_readlane_b32 s5, v40, 1
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[72:73], s[36:37]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s75, s39
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_12
|
|
; GLOBALNESS1-NEXT: ; %bb.11: ; %bb39.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
|
|
; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
|
|
; GLOBALNESS1-NEXT: .LBB1_12: ; %bb44.lr.ph.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v45
|
|
; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v44, vcc
|
|
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0)
|
|
; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e64 s[36:37], 0, v[0:1]
|
|
; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_15
|
|
; GLOBALNESS1-NEXT: .LBB1_13: ; %Flow7
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GLOBALNESS1-NEXT: .LBB1_14: ; %bb63.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[86:87]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25
|
|
; GLOBALNESS1-NEXT: .LBB1_15: ; %bb44.i
|
|
; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[94:95]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14
|
|
; GLOBALNESS1-NEXT: ; %bb.16: ; %bb46.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[88:89]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14
|
|
; GLOBALNESS1-NEXT: ; %bb.17: ; %bb50.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[40:41]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_20
|
|
; GLOBALNESS1-NEXT: ; %bb.18: ; %bb3.i.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[42:43]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_20
|
|
; GLOBALNESS1-NEXT: ; %bb.19: ; %bb6.i.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[36:37]
|
|
; GLOBALNESS1-NEXT: .LBB1_20: ; %spam.exit.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[90:91]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14
|
|
; GLOBALNESS1-NEXT: ; %bb.21: ; %bb55.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: s_add_u32 s60, s62, 40
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s61, s63, 0
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[60:61]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[66:67]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], 0, 0
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[60:61]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], a[32:33], off
|
|
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[66:67]
|
|
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[58:59]
|
|
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_13
|
|
; GLOBALNESS1-NEXT: ; %bb.22: ; %bb62.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_13
|
|
; GLOBALNESS1-NEXT: .LBB1_23: ; %LeafBlock
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_cmp_lg_u32 s39, 0
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
|
|
; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
|
|
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9
|
|
; GLOBALNESS1-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
|
|
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_3
|
|
; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow14
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s36, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s37, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s38, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s39, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s40, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s41, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[42:43]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s42, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s43, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s44, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s45, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s46, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s47, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s48, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s49, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s50, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s51, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s52, s93
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s53, s93
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[40:41], s[40:41] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[42:43], s[42:43] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[44:45], s[44:45] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[46:47], s[46:47] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[48:49], s[48:49] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[50:51], s[50:51] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[52:53], s[52:53] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[54:55], s[54:55] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[56:57], s[56:57] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[58:59], s[58:59] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[60:61], s[60:61] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[62:63], s[62:63] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[64:65], s[64:65] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[66:67], s[66:67] op_sel:[0,1]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[42:43], s[6:7]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[40:41], s[4:5]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s39, s75
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[72:73]
|
|
; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow15
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[70:71]
|
|
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
|
|
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2
|
|
; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_readlane_b32 s6, v40, 2
|
|
; GLOBALNESS1-NEXT: v_readlane_b32 s7, v40, 3
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0
|
|
; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[42:43], off
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_1
|
|
; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i
|
|
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0
|
|
; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[42:43], off
|
|
; GLOBALNESS1-NEXT: s_branch .LBB1_2
|
|
; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32
|
|
; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i
|
|
; GLOBALNESS1-NEXT: s_add_u32 s8, s62, 40
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s9, s63, 0
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
|
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
|
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
|
|
; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow
|
|
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
|
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34
|
|
; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i
|
|
; GLOBALNESS1-NEXT: s_add_u32 s8, s62, 40
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s9, s63, 0
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS1-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
|
|
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
|
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
|
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
|
|
;
|
|
; GLOBALNESS0-LABEL: kernel:
|
|
; GLOBALNESS0: ; %bb.0: ; %bb
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[6:7]
|
|
; GLOBALNESS0-NEXT: s_load_dwordx4 s[36:39], s[8:9], 0x0
|
|
; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
|
|
; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off
|
|
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GLOBALNESS0-NEXT: global_load_dword v0, v42, s[36:37]
|
|
; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[62:63], s[4:5]
|
|
; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
|
|
; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20
|
|
; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
|
; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s17
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, 0x40994400
|
|
; GLOBALNESS0-NEXT: s_bitcmp1_b32 s38, 0
|
|
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[40:41], s[4:5], v[42:43]
|
|
; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[42:43], s[4:5], 0
|
|
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GLOBALNESS0-NEXT: s_xor_b64 s[94:95], s[4:5], -1
|
|
; GLOBALNESS0-NEXT: s_bitcmp1_b32 s6, 0
|
|
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
|
|
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GLOBALNESS0-NEXT: s_xor_b64 s[88:89], s[4:5], -1
|
|
; GLOBALNESS0-NEXT: s_bitcmp1_b32 s7, 0
|
|
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GLOBALNESS0-NEXT: s_getpc_b64 s[6:7]
|
|
; GLOBALNESS0-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12
|
|
; GLOBALNESS0-NEXT: s_xor_b64 s[86:87], s[4:5], -1
|
|
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr40
|
|
; GLOBALNESS0-NEXT: s_load_dwordx2 s[66:67], s[6:7], 0x0
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s98, s16
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[60:61], s[8:9]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s99, s15
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s56, s14
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[92:93], 0x80
|
|
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[36:37], 1, v1
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s69, 0x3ff00000
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s32, 0
|
|
; GLOBALNESS0-NEXT: ; implicit-def: $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
|
|
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0)
|
|
; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0
|
|
; GLOBALNESS0-NEXT: v_writelane_b32 v40, s4, 0
|
|
; GLOBALNESS0-NEXT: v_writelane_b32 v40, s5, 1
|
|
; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
|
|
; GLOBALNESS0-NEXT: v_writelane_b32 v40, s4, 2
|
|
; GLOBALNESS0-NEXT: v_writelane_b32 v40, s5, 3
|
|
; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
|
|
; GLOBALNESS0-NEXT: v_writelane_b32 v40, s4, 4
|
|
; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[90:91], 1, v0
|
|
; GLOBALNESS0-NEXT: v_writelane_b32 v40, s5, 5
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_4
|
|
; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v40, 4
|
|
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v40, 5
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29
|
|
; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow6
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0
|
|
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
|
|
; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow19
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a63, v31
|
|
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a62, v30
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a61, v29
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a60, v28
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a59, v27
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a58, v26
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a57, v25
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a56, v24
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a55, v23
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a54, v22
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a53, v21
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a52, v20
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a51, v19
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a50, v18
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a49, v17
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a48, v16
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a47, v15
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a46, v14
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a45, v13
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a44, v12
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a43, v11
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a42, v10
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a41, v9
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a40, v8
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a39, v7
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a38, v6
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a37, v5
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a36, v4
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a35, v3
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a34, v2
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a33, v1
|
|
; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a32, v0
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_30
|
|
; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5
|
|
; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1
|
|
; GLOBALNESS0-NEXT: ; Child Loop BB1_15 Depth 2
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[92:93], s[92:93] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: flat_load_dword v44, v[0:1]
|
|
; GLOBALNESS0-NEXT: s_add_u32 s8, s60, 40
|
|
; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0
|
|
; GLOBALNESS0-NEXT: flat_load_dword v45, v[0:1]
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s9, s61, 0
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[66:67]
|
|
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[36:37]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
|
|
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_8
|
|
; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_cmp_lt_i32 s39, 1
|
|
; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7
|
|
; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock3
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_cmp_lg_u32 s39, 1
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
|
|
; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
|
|
; GLOBALNESS0-NEXT: s_cbranch_execnz .LBB1_8
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_23
|
|
; GLOBALNESS0-NEXT: .LBB1_7: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0
|
|
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_23
|
|
; GLOBALNESS0-NEXT: .LBB1_8: ; %Flow16
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24
|
|
; GLOBALNESS0-NEXT: .LBB1_9: ; %baz.exit.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0
|
|
; GLOBALNESS0-NEXT: flat_load_dword v0, v[32:33]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s68, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s70, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s71, s69
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s72, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s73, s69
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s74, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s75, s69
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s76, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s77, s69
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s78, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s79, s69
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s80, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s81, s69
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s82, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s83, s69
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s84, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s85, s69
|
|
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[96:97], 0, v0
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[70:71], s[96:97]
|
|
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26
|
|
; GLOBALNESS0-NEXT: ; %bb.10: ; %bb33.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[32:33], off
|
|
; GLOBALNESS0-NEXT: v_readlane_b32 s4, v40, 0
|
|
; GLOBALNESS0-NEXT: v_readlane_b32 s5, v40, 1
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[72:73], s[36:37]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s75, s39
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_12
|
|
; GLOBALNESS0-NEXT: ; %bb.11: ; %bb39.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
|
|
; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off
|
|
; GLOBALNESS0-NEXT: .LBB1_12: ; %bb44.lr.ph.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v45
|
|
; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v44, vcc
|
|
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0)
|
|
; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e64 s[36:37], 0, v[0:1]
|
|
; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_15
|
|
; GLOBALNESS0-NEXT: .LBB1_13: ; %Flow7
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GLOBALNESS0-NEXT: .LBB1_14: ; %bb63.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[86:87]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25
|
|
; GLOBALNESS0-NEXT: .LBB1_15: ; %bb44.i
|
|
; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[94:95]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14
|
|
; GLOBALNESS0-NEXT: ; %bb.16: ; %bb46.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[88:89]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14
|
|
; GLOBALNESS0-NEXT: ; %bb.17: ; %bb50.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[40:41]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_20
|
|
; GLOBALNESS0-NEXT: ; %bb.18: ; %bb3.i.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[42:43]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_20
|
|
; GLOBALNESS0-NEXT: ; %bb.19: ; %bb6.i.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[36:37]
|
|
; GLOBALNESS0-NEXT: .LBB1_20: ; %spam.exit.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[90:91]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14
|
|
; GLOBALNESS0-NEXT: ; %bb.21: ; %bb55.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: s_add_u32 s64, s60, 40
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s65, s61, 0
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[64:65]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[66:67]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], 0, 0
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[64:65]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], a[32:33], off
|
|
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[66:67]
|
|
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[58:59]
|
|
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_13
|
|
; GLOBALNESS0-NEXT: ; %bb.22: ; %bb62.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_13
|
|
; GLOBALNESS0-NEXT: .LBB1_23: ; %LeafBlock
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_cmp_lg_u32 s39, 0
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
|
|
; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
|
|
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9
|
|
; GLOBALNESS0-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
|
|
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_3
|
|
; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow14
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s36, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s37, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s38, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s39, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s40, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s41, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[42:43]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s42, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s43, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s44, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s45, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s46, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s47, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s48, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s49, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s50, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s51, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s52, s93
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s53, s93
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[40:41], s[40:41] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[42:43], s[42:43] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[44:45], s[44:45] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[46:47], s[46:47] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[48:49], s[48:49] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[50:51], s[50:51] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[52:53], s[52:53] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[54:55], s[54:55] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[56:57], s[56:57] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[58:59], s[58:59] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[60:61], s[60:61] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[62:63], s[62:63] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[64:65], s[64:65] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[66:67], s[66:67] op_sel:[0,1]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[42:43], s[6:7]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[40:41], s[4:5]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s39, s75
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[72:73]
|
|
; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow15
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[70:71]
|
|
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
|
|
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2
|
|
; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_readlane_b32 s6, v40, 2
|
|
; GLOBALNESS0-NEXT: v_readlane_b32 s7, v40, 3
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0
|
|
; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[42:43], off
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_1
|
|
; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i
|
|
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42
|
|
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0
|
|
; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[42:43], off
|
|
; GLOBALNESS0-NEXT: s_branch .LBB1_2
|
|
; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32
|
|
; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i
|
|
; GLOBALNESS0-NEXT: s_add_u32 s8, s60, 40
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s9, s61, 0
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
|
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
|
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
|
|
; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow
|
|
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
|
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34
|
|
; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i
|
|
; GLOBALNESS0-NEXT: s_add_u32 s8, s60, 40
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s9, s61, 0
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55]
|
|
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s12, s56
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s13, s99
|
|
; GLOBALNESS0-NEXT: s_mov_b32 s14, s98
|
|
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
|
|
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
|
|
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4
|
|
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12
|
|
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
|
|
bb:
|
|
store i32 0, i32 addrspace(1)* null, align 4
|
|
%tmp4 = load i32, i32 addrspace(1)* %arg1.global, align 4
|
|
br label %bb5
|
|
|
|
bb5: ; preds = %bb5.backedge, %bb
|
|
%tmp4.i.sroa.0.0 = phi <9 x double> [ undef, %bb ], [ %tmp4.i.sroa.0.1, %bb5.backedge ]
|
|
%tmp14.1.i = load i32, i32* inttoptr (i64 128 to i32*), align 128
|
|
store i32 0, i32 addrspace(5)* null, align 4
|
|
%tmp14.2.i = load i32, i32* inttoptr (i64 128 to i32*), align 128
|
|
%tmp15.2.i = icmp eq i32 %tmp14.2.i, 0
|
|
%spec.select.2.i = select i1 %tmp15.2.i, i32 0, i32 %tmp14.1.i
|
|
tail call void @wobble()
|
|
br i1 %tmp3.i.i, label %bb4.i.i, label %baz.exit.i
|
|
|
|
bb4.i.i: ; preds = %bb5
|
|
switch i32 %tmp5.i.i, label %baz.exit.i [
|
|
i32 0, label %bb7.i.i
|
|
i32 1, label %bb11.i.i
|
|
]
|
|
|
|
bb7.i.i: ; preds = %bb4.i.i
|
|
tail call fastcc void @widget()
|
|
unreachable
|
|
|
|
bb11.i.i: ; preds = %bb4.i.i
|
|
tail call fastcc void @widget()
|
|
unreachable
|
|
|
|
baz.exit.i: ; preds = %bb4.i.i, %bb5
|
|
%tmp26.i = load i32, i32* null, align 4
|
|
%tmp27.i4 = load double, double addrspace(1)* null, align 8
|
|
%tmp31.i = icmp slt i32 %tmp26.i, 0
|
|
br i1 %tmp31.i, label %bb33.i, label %bb64.i
|
|
|
|
bb33.i: ; preds = %baz.exit.i
|
|
%tmp38.i = icmp slt i32 %tmp4, 0
|
|
br i1 %tmp38.i, label %bb39.i, label %bb44.lr.ph.i
|
|
|
|
bb39.i: ; preds = %bb33.i
|
|
store double 0.000000e+00, double addrspace(1)* null, align 8
|
|
br label %bb44.lr.ph.i
|
|
|
|
bb44.lr.ph.i: ; preds = %bb39.i, %bb33.i
|
|
br label %bb44.i
|
|
|
|
bb44.i: ; preds = %bb63.i, %bb44.lr.ph.i
|
|
br i1 %tmp3.i.i, label %bb63.i, label %bb46.i
|
|
|
|
bb46.i: ; preds = %bb44.i
|
|
br i1 %tmp438.i, label %bb63.i, label %bb50.i
|
|
|
|
bb50.i: ; preds = %bb46.i
|
|
switch i32 0, label %spam.exit.i [
|
|
i32 0, label %bb1.i.i
|
|
]
|
|
|
|
bb1.i.i: ; preds = %bb50.i
|
|
%tmp2.i.i = fcmp ogt double %tmp27.i, 1.617000e+03
|
|
br i1 %tmp2.i.i, label %spam.exit.i, label %bb3.i.i
|
|
|
|
bb3.i.i: ; preds = %bb1.i.i
|
|
%tmp4.i.i = fcmp ogt double %tmp27.i, 0.000000e+00
|
|
br i1 %tmp4.i.i, label %spam.exit.i, label %bb6.i.i
|
|
|
|
bb6.i.i: ; preds = %bb3.i.i
|
|
%tmp7.i.i = fcmp ogt double %tmp27.i4, 0.000000e+00
|
|
br i1 %tmp7.i.i, label %spam.exit.i, label %bb8.i.i
|
|
|
|
bb8.i.i: ; preds = %bb6.i.i
|
|
tail call void null()
|
|
br label %spam.exit.i
|
|
|
|
spam.exit.i: ; preds = %bb8.i.i, %bb6.i.i, %bb3.i.i, %bb1.i.i, %bb50.i
|
|
%tmp22.i = icmp sgt i32 %tmp4, 0
|
|
br i1 %tmp22.i, label %bb63.i, label %bb55.i
|
|
|
|
bb55.i: ; preds = %spam.exit.i
|
|
tail call void @wobble()
|
|
%tmp0 = extractelement <9 x double> %tmp4.i.sroa.0.0, i32 0
|
|
store double %tmp0, double addrspace(1)* null, align 8
|
|
tail call void @wobble()
|
|
%tmp61.i = icmp eq i32 %spec.select.2.i, 0
|
|
br i1 %tmp61.i, label %bb62.i, label %bb63.i
|
|
|
|
bb62.i: ; preds = %bb55.i
|
|
store double 0.000000e+00, double addrspace(1)* null, align 8
|
|
br label %bb63.i
|
|
|
|
bb63.i: ; preds = %bb62.i, %bb55.i, %spam.exit.i, %bb46.i, %bb44.i
|
|
br i1 %tmp48.i, label %bb44.i, label %bb64.i
|
|
|
|
bb64.i: ; preds = %bb63.i, %baz.exit.i
|
|
%tmp4.i.sroa.0.1 = phi <9 x double> [ <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %baz.exit.i ], [ zeroinitializer, %bb63.i ]
|
|
br i1 %tmp31.i, label %bb67.i, label %bb5.backedge
|
|
|
|
bb5.backedge: ; preds = %bb73.i, %bb70.i, %bb64.i
|
|
br label %bb5
|
|
|
|
bb67.i: ; preds = %bb64.i
|
|
%tmp68.i = icmp eq i32 %tmp4, 1
|
|
br i1 %tmp68.i, label %bb69.i, label %bb70.i
|
|
|
|
bb69.i: ; preds = %bb67.i
|
|
store double 0.000000e+00, double addrspace(1)* null, align 8
|
|
br label %bb70.i
|
|
|
|
bb70.i: ; preds = %bb69.i, %bb67.i
|
|
%tmp3.i.i2 = icmp eq i32 %tmp4, 0
|
|
br i1 %tmp3.i.i2, label %bb73.i, label %bb5.backedge
|
|
|
|
bb73.i: ; preds = %bb70.i
|
|
store double 0.000000e+00, double addrspace(1)* null, align 8
|
|
br label %bb5.backedge
|
|
}
|