Switch to using immediate offsets instead of the SP register to access objects on the current stack frame in chain functions. This means we no longer need to reserve a SP register just for accesing stack objects and it also allows us to set the SP (when one is actually needed) to the stack size from the very beginning. This only works if we use a FixedObject for the ScavengeFI, which is what we do for entry functions anyway (and we generally want to keep chain functions close to amdgpu_cs behaviour where we don't have a good reason to diverge).
937 lines
44 KiB
LLVM
937 lines
44 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
|
|
|
|
declare amdgpu_gfx void @use(...)
|
|
|
|
define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
|
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, <4 x i32> %vgpr) {
|
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
|
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
|
; GISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
|
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
|
; GISEL-GFX10-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
|
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
|
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
|
call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 x i32> %vgprs) {
|
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 8
|
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 12
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 16
|
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 20
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 24
|
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 28
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 32
|
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 36
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15
|
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 44
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 48
|
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 52
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25
|
|
; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 56
|
|
; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 60
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v31, s25
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v24, v32 :: v_dual_mov_b32 v25, v33
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v26, v34 :: v_dual_mov_b32 v27, v35
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v28, v36 :: v_dual_mov_b32 v29, v37
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v30, v38 :: v_dual_mov_b32 v31, v39
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
|
; GISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v32, v8
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v33, v9
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v34, v10
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v35, v11
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v36, v12
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v24, v32
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v25, v33
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v26, v34
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v27, v35
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v28, v36
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v29, v37
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v30, v38
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v31, v39
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
|
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
|
; GISEL-GFX10-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_spill:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 56
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 52
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 48
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 44
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 40
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 36
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 32
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 28
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 20
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 16
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 12
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 8
|
|
; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 4
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v17, s25
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v24, v39 :: v_dual_mov_b32 v25, v38
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v26, v37 :: v_dual_mov_b32 v27, v36
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v28, v35 :: v_dual_mov_b32 v29, v34
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v30, v33 :: v_dual_mov_b32 v31, v32
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v32, v15
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v33, v14
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v34, v13
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v35, v12
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v36, v11
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v24, v39
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v25, v38
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v26, v37
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v27, v36
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v28, v35
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v29, v34
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v30, v33
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v31, v32
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
|
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25]
|
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
|
call amdgpu_gfx void @use(<24 x i32> %sgprs, <24 x i32> %vgprs)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs_chain void @alloca_and_call() {
|
|
; GISEL-GFX11-LABEL: alloca_and_call:
|
|
; GISEL-GFX11: ; %bb.0: ; %.entry
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s32, 16
|
|
; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, off offset:4
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 4
|
|
; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; GISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX10-LABEL: alloca_and_call:
|
|
; GISEL-GFX10: ; %bb.0: ; %.entry
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
|
|
; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200
|
|
; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
|
; GISEL-GFX10-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX11-LABEL: alloca_and_call:
|
|
; DAGISEL-GFX11: ; %bb.0: ; %.entry
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, off offset:4
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 4
|
|
; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX10-LABEL: alloca_and_call:
|
|
; DAGISEL-GFX10: ; %bb.0: ; %.entry
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
|
|
; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200
|
|
; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
|
.entry:
|
|
%v = alloca [3 x i32], addrspace(5)
|
|
store i32 42, ptr addrspace(5) %v
|
|
call amdgpu_gfx void @use(ptr addrspace(5) %v)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: cs_to_chain:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: cs_to_chain:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
|
|
; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: cs_to_chain:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: cs_to_chain:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
|
|
; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_to_chain:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_to_chain:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_to_chain:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_to_chain:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain void @chain_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_to_chain_wwm:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 3
|
|
; GISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 4
|
|
; GISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_to_chain_wwm:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 3
|
|
; GISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 4
|
|
; GISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_to_chain_wwm:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, 3
|
|
; DAGISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, 4
|
|
; DAGISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_to_chain_wwm:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, 3
|
|
; DAGISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, 4
|
|
; DAGISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
%i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4)
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
%w = call i32 @llvm.amdgcn.wwm(i32 %i)
|
|
%c = insertelement <3 x i32> %b, i32 %w, i32 0
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain void @chain_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain void @chain_to_chain_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_to_chain_fewer_args:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s2, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_to_chain_fewer_args:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s2, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s2
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_to_chain_fewer_args:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_to_chain_fewer_args:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
%s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
|
|
%v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain void @chain_to_chain_more_args(<3 x i32> inreg %a, <3 x i32> %b) {
|
|
; GISEL-GFX11-LABEL: chain_to_chain_more_args:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX11-NEXT: s_nop
|
|
; GISEL-GFX11-NEXT: ;;#ASMEND
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, 0
|
|
; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; GISEL-GFX10-LABEL: chain_to_chain_more_args:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; GISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; GISEL-GFX10-NEXT: s_nop
|
|
; GISEL-GFX10-NEXT: ;;#ASMEND
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; GISEL-GFX10-NEXT: s_mov_b32 s3, 0
|
|
; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX11-LABEL: chain_to_chain_more_args:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX11-NEXT: s_nop
|
|
; DAGISEL-GFX11-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 s3, 0
|
|
; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
|
|
;
|
|
; DAGISEL-GFX10-LABEL: chain_to_chain_more_args:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMSTART
|
|
; DAGISEL-GFX10-NEXT: s_nop
|
|
; DAGISEL-GFX10-NEXT: ;;#ASMEND
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 s3, 0
|
|
; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
|
|
; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
|
|
%s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
|
|
call void(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v4i32(ptr @chain_callee_2, i32 -1, <4 x i32> inreg %s, <4 x i32> %v, i32 0)
|
|
unreachable
|
|
}
|
|
|
|
define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) {
|
|
; GISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
|
; GISEL-GFX11: ; %bb.0:
|
|
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s3, 4
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s2, 3
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s1, 2
|
|
; GISEL-GFX11-NEXT: s_mov_b32 s0, 1
|
|
; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8
|
|
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GISEL-GFX11-NEXT: v_add_nc_u32_e32 v4, 32, v0
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
|
|
; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
|
|
; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
|
|
; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
|
; GISEL-GFX10: ; %bb.0:
|
|
; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3
|
|
; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4
|
|
; GISEL-GFX10-NEXT: v_add_nc_u32_e32 v0, 32, v0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12
|
|
; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GISEL-GFX10-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
|
; DAGISEL-GFX11: ; %bb.0:
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
|
|
; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
|
|
; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, 32
|
|
; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc
|
|
; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX11-NEXT: s_endpgm
|
|
;
|
|
; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack:
|
|
; DAGISEL-GFX10: ; %bb.0:
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4
|
|
; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, 32
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2
|
|
; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:4
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen
|
|
; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; DAGISEL-GFX10-NEXT: s_endpgm
|
|
%alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5)
|
|
%gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx
|
|
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
|
|
declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
|
|
declare void @llvm.amdgcn.cs.chain.v4i32(ptr, i32, <4 x i32>, <4 x i32>, i32, ...)
|
|
declare amdgpu_cs_chain void @chain_callee_2(<2 x i32> inreg, <2 x i32>)
|
|
declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
|
|
declare amdgpu_cs_chain void @chain_callee_4(<4 x i32> inreg, <4 x i32>)
|
|
declare i32 @llvm.amdgcn.set.inactive(i32, i32)
|
|
declare i32 @llvm.amdgcn.wwm(i32)
|