Some opcodes in MIR are defined to be convergent by the target by setting IsConvergent in the corresponding TD file. For example, in AMDGPU, the opcodes G_SI_CALL and G_INTRINSIC* are marked as convergent. But this is too conservative, since calls to functions that do not execute convergent operations should not be marked convergent. This information is available in LLVM IR. The new flag MIFlag::NoConvergent now allows the IR translator to mark an instruction as not performing any convergent operations. It is relevant only on occurrences of opcodes that are marked isConvergent in the target. Differential Revision: https://reviews.llvm.org/D157475
236 lines
15 KiB
LLVM
236 lines
15 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -stop-after=irtranslator -o - %s | FileCheck %s
|
|
|
|
declare align(8) dereferenceable(8) ptr @declared_with_ret_deref() #0
|
|
declare align(8) ptr @unknown_decl() #0
|
|
declare align(8) dereferenceable(4) ptr @declared_with_ret_deref4() #0
|
|
declare align(8) dereferenceable_or_null(8) ptr @declared_with_ret_deref_or_null() #0
|
|
declare align(8) nonnull ptr @nonnull_decl() #0
|
|
declare align(8) dereferenceable_or_null(4) ptr @declared_with_ret_deref_or_null4() #0
|
|
|
|
; Should have dereferenceable on mem operand
|
|
define i64 @load_deref_declaration_only() {
|
|
; CHECK-LABEL: name: load_deref_declaration_only
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call = call ptr @declared_with_ret_deref()
|
|
%load = load i64, ptr %call, align 8
|
|
ret i64 %load
|
|
}
|
|
|
|
; No dereferenceable on mem operand
|
|
define i64 @load_deref_unknown_decl() {
|
|
; CHECK-LABEL: name: load_deref_unknown_decl
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call)
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call = call ptr @unknown_decl()
|
|
%load = load i64, ptr %call, align 8
|
|
ret i64 %load
|
|
}
|
|
|
|
; Should have dereferenceable on mem operand
|
|
define i64 @load_deref_callsite_only() {
|
|
; CHECK-LABEL: name: load_deref_callsite_only
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call = call dereferenceable(8) ptr @unknown_decl()
|
|
%load = load i64, ptr %call, align 8
|
|
ret i64 %load
|
|
}
|
|
|
|
; Both loads should have effective dereferenceable(8) since the
|
|
; maximum should be used.
|
|
define i64 @load_deref_maxmimum_callsite_declaration_only() {
|
|
; CHECK-LABEL: name: load_deref_maxmimum_callsite_declaration_only
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0)
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref4
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8
|
|
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1)
|
|
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call0 = call dereferenceable(4) ptr @declared_with_ret_deref()
|
|
%load0 = load i64, ptr %call0, align 8
|
|
%call1 = call dereferenceable(8) ptr @declared_with_ret_deref4()
|
|
%load1 = load i64, ptr %call1, align 8
|
|
%add = add i64 %load0, %load1
|
|
ret i64 %add
|
|
}
|
|
|
|
; Should have deref_or_nullerenceable on mem operand
|
|
define i64 @load_deref_or_null_declaration_only() {
|
|
; CHECK-LABEL: name: load_deref_or_null_declaration_only
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call = call nonnull ptr @declared_with_ret_deref_or_null()
|
|
%load = load i64, ptr %call, align 8
|
|
ret i64 %load
|
|
}
|
|
|
|
; No deref_or_nullerenceable on mem operand
|
|
define i64 @load_deref_or_null_nonnull_decl() {
|
|
; CHECK-LABEL: name: load_deref_or_null_nonnull_decl
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call)
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call = call ptr @nonnull_decl()
|
|
%load = load i64, ptr %call, align 8
|
|
ret i64 %load
|
|
}
|
|
|
|
; Should have deref_or_nullerenceable on mem operand
|
|
define i64 @load_deref_or_null_callsite_only() {
|
|
; CHECK-LABEL: name: load_deref_or_null_callsite_only
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call = call dereferenceable_or_null(8) ptr @nonnull_decl()
|
|
%load = load i64, ptr %call, align 8
|
|
ret i64 %load
|
|
}
|
|
|
|
; Both loads should have effective deref_or_nullerenceable(8) since the
|
|
; maximum should be used.
|
|
define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() {
|
|
; CHECK-LABEL: name: load_deref_or_null_maxmimum_callsite_declaration_only
|
|
; CHECK: bb.1 (%ir-block.0):
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
|
|
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0)
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null4
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
|
|
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref_or_null4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
|
|
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
|
; CHECK-NEXT: [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8
|
|
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1)
|
|
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
|
|
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
|
|
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
|
|
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
|
|
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
|
|
%call0 = call dereferenceable_or_null(4) nonnull ptr @declared_with_ret_deref_or_null()
|
|
%load0 = load i64, ptr %call0, align 8
|
|
%call1 = call dereferenceable_or_null(8) nonnull ptr @declared_with_ret_deref_or_null4()
|
|
%load1 = load i64, ptr %call1, align 8
|
|
%add = add i64 %load0, %load1
|
|
ret i64 %add
|
|
}
|
|
|
|
attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
|