Files
clang-p2996/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
Tim Renouf 2a99fa2c08 [AMDGPU] added writelane intrinsic
Summary:
For use by LLPC SPV_AMD_shader_ballot extension.

The v_writelane instruction was already implemented for use by SGPR
spilling, but I had to add an extra dummy operand tied to the
destination, to represent that all lanes except the selected one keep
the old value of the destination register.

.ll test changes were due to schedule changes caused by that new
operand.

Differential Revision: https://reviews.llvm.org/D42838

llvm-svn: 326353
2018-02-28 19:10:32 +00:00

563 lines
13 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
--- |
define amdgpu_kernel void @div_fmas() { ret void }
define amdgpu_kernel void @s_getreg() { ret void }
define amdgpu_kernel void @s_setreg() { ret void }
define amdgpu_kernel void @vmem_gt_8dw_store() { ret void }
define amdgpu_kernel void @readwrite_lane() { ret void }
define amdgpu_kernel void @rfe() { ret void }
define amdgpu_kernel void @s_mov_fed_b32() { ret void }
define amdgpu_kernel void @s_movrel() { ret void }
define amdgpu_kernel void @v_interp() { ret void }
define amdgpu_kernel void @dpp() { ret void }
define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
entry:
%A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !5, metadata !11), !dbg !12
ret void
}
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 2}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !DIExpression()
!12 = !DILocation(line: 1, column: 30, scope: !6)
...
---
# GCN-LABEL: name: div_fmas
# GCN-LABEL: bb.0:
# GCN: S_MOV_B64
# GCN-NOT: S_NOP
# GCN: V_DIV_FMAS
# GCN-LABEL: bb.1:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.2:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.3:
# GCN: V_DIV_SCALE_F32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_DIV_FMAS_F32
name: div_fmas
body: |
bb.0:
$vcc = S_MOV_B64 0
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_BRANCH %bb.1
bb.1:
implicit $vcc = V_CMP_EQ_I32_e32 $vgpr1, $vgpr2, implicit $exec
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_BRANCH %bb.2
bb.2:
$vcc = V_CMP_EQ_I32_e64 $vgpr1, $vgpr2, implicit $exec
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_BRANCH %bb.3
bb.3:
$vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $exec
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_getreg
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# GCN: S_NOP 0
# GCN: S_NOP 0
# GCN: S_GETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG_IMM32
# GCN: S_NOP 0
# GCN: S_NOP 0
# GCN: S_GETREG
# GCN-LABEL: bb.2:
# GCN: S_SETREG
# GCN: S_NOP 0
# GCN: S_GETREG
# GCN-LABEL: bb.3:
# GCN: S_SETREG
# GCN-NEXT: S_GETREG
name: s_getreg
body: |
bb.0:
S_SETREG_B32 $sgpr0, 1
$sgpr1 = S_GETREG_B32 1
S_BRANCH %bb.1
bb.1:
S_SETREG_IMM32_B32 0, 1
$sgpr1 = S_GETREG_B32 1
S_BRANCH %bb.2
bb.2:
S_SETREG_B32 $sgpr0, 1
$sgpr1 = S_MOV_B32 0
$sgpr2 = S_GETREG_B32 1
S_BRANCH %bb.3
bb.3:
S_SETREG_B32 $sgpr0, 0
$sgpr1 = S_GETREG_B32 1
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_setreg
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# GCN: S_NOP 0
# VI: S_NOP 0
# GCN-NEXT: S_SETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG
# GCN: S_NOP 0
# VI: S_NOP 0
# GCN-NEXT: S_SETREG
# GCN-LABEL: bb.2:
# GCN: S_SETREG
# GCN-NEXT: S_SETREG
name: s_setreg
body: |
bb.0:
S_SETREG_B32 $sgpr0, 1
S_SETREG_B32 $sgpr1, 1
S_BRANCH %bb.1
bb.1:
S_SETREG_B32 $sgpr0, 64
S_SETREG_B32 $sgpr1, 128
S_BRANCH %bb.2
bb.2:
S_SETREG_B32 $sgpr0, 1
S_SETREG_B32 $sgpr1, 0
S_ENDPGM
...
...
---
# GCN-LABEL: name: vmem_gt_8dw_store
# GCN-LABEL: bb.0:
# GCN: BUFFER_STORE_DWORD_OFFSET
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX3_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX4_OFFSET
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX4_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN-LABEL: bb.1:
# GCN: FLAT_STORE_DWORDX2
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_STORE_DWORDX3
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_STORE_DWORDX4
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_ATOMIC_CMPSWAP_X2
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
# CIVI: S_NOP
# GCN: V_MOV_B32
name: vmem_gt_8dw_store
body: |
bb.0:
BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.1
bb.1:
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_ATOMIC_FCMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: readwrite_lane
# GCN-LABEL: bb.0:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_READLANE_B32
# GCN-LABEL: bb.1:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_WRITELANE_B32
# GCN-LABEL: bb.2:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_READLANE_B32
# GCN-LABEL: bb.3:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_WRITELANE_B32
name: readwrite_lane
body: |
bb.0:
$vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$sgpr4 = V_READLANE_B32 $vgpr4, $sgpr0
S_BRANCH %bb.1
bb.1:
$vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4
S_BRANCH %bb.2
bb.2:
$vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$sgpr4 = V_READLANE_B32 $vgpr4, $vcc_lo
S_BRANCH %bb.3
bb.3:
$vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$vgpr4 = V_WRITELANE_B32 $sgpr4, $vcc_lo, $vgpr4
S_ENDPGM
...
...
---
# GCN-LABEL: name: rfe
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# VI: S_NOP
# GCN-NEXT: S_RFE_B64
# GCN-LABEL: bb.1:
# GCN: S_SETREG
# GCN-NEXT: S_RFE_B64
name: rfe
body: |
bb.0:
S_SETREG_B32 $sgpr0, 3
S_RFE_B64 $sgpr2_sgpr3
S_BRANCH %bb.1
bb.1:
S_SETREG_B32 $sgpr0, 0
S_RFE_B64 $sgpr2_sgpr3
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_mov_fed_b32
# GCN-LABEL: bb.0:
# GCN: S_MOV_FED_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOV_B32
# GCN-LABEL: bb.1:
# GCN: S_MOV_FED_B32
# GFX9: S_NOP
# GCN-NEXT: V_MOV_B32
name: s_mov_fed_b32
body: |
bb.0:
$sgpr0 = S_MOV_FED_B32 $sgpr0
$sgpr0 = S_MOV_B32 $sgpr0
S_BRANCH %bb.1
bb.1:
$sgpr0 = S_MOV_FED_B32 $sgpr0
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_movrel
# GCN-LABEL: bb.0:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELS_B32
# GCN-LABEL: bb.1:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELS_B64
# GCN-LABEL: bb.2:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELD_B32
# GCN-LABEL: bb.3:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELD_B64
name: s_movrel
body: |
bb.0:
$m0 = S_MOV_B32 0
$sgpr0 = S_MOVRELS_B32 $sgpr0, implicit $m0
S_BRANCH %bb.1
bb.1:
$m0 = S_MOV_B32 0
$sgpr0_sgpr1 = S_MOVRELS_B64 $sgpr0_sgpr1, implicit $m0
S_BRANCH %bb.2
bb.2:
$m0 = S_MOV_B32 0
$sgpr0 = S_MOVRELD_B32 $sgpr0, implicit $m0
S_BRANCH %bb.3
bb.3:
$m0 = S_MOV_B32 0
$sgpr0_sgpr1 = S_MOVRELD_B64 $sgpr0_sgpr1, implicit $m0
S_ENDPGM
...
...
---
# GCN-LABEL: name: v_interp
# GCN-LABEL: bb.0:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32
# GCN-LABEL: bb.1:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P2_F32
# GCN-LABEL: bb.2:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32_16bank
# GCN-LABEL: bb.3:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_MOV_F32
name: v_interp
body: |
bb.0:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_P2_F32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.2
bb.2:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_P1_F32_16bank $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.3
bb.3:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $m0, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: dpp
# VI-LABEL: bb.0:
# VI: V_MOV_B32_e32
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: V_MOV_B32_dpp
# VI-LABEL: bb.1:
# VI: V_CMPX_EQ_I32_e32
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: V_MOV_B32_dpp
name: dpp
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
$vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
S_ENDPGM
...
---
name: mov_fed_hazard_crash_on_dbg_value
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
liveins:
- { reg: '$sgpr4_sgpr5' }
- { reg: '$sgpr6_sgpr7' }
- { reg: '$sgpr9' }
- { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 16
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
stack:
- { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
- { id: 1, offset: 8, size: 4, alignment: 4 }
body: |
bb.0.entry:
liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3
$flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
$flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
DBG_VALUE $noreg, 2, !5, !11, debug-location !12
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
$sgpr8 = S_MOV_B32 $sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
$sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
S_ENDPGM
...