This allows us to emit wide generic and scratch memory accesses when we do not have alignment information. In cases where accesses happen to be properly aligned or where generic accesses do not go to scratch memory, this improves performance of the generated code by a factor of up to 16x and reduces code size, especially when lowering memcpy and memmove intrinsics. Also: Make the use of the FeatureUnalignedScratchAccess feature more consistent: FeatureUnalignedScratchAccess and EnableFlatScratch are now orthogonal, whereas, before, code assumed that the latter implies the former at some places. Part of SWDEV-455845.
4681 lines
229 KiB
LLVM
4681 lines
229 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s
|
|
|
|
; Testing codegen for memcpy with vector operands for all combinations of the following parameters:
|
|
; destination address space: 0, 1, 3, 5
|
|
; source address space: 0, 1, 3, 4, 5
|
|
; alignment: 1, 2, 8, 16
|
|
; sizes: 16, 31, 32
|
|
|
|
|
|
define void @memcpy_p0_p0_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30
|
|
; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28
|
|
; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30
|
|
; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28
|
|
; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p0_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p0_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30
|
|
; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28
|
|
; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30
|
|
; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28
|
|
; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p1_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p1_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_u8 v9, v2 offset:30
|
|
; CHECK-NEXT: ds_read_b32 v8, v2 offset:24
|
|
; CHECK-NEXT: ds_read_u16 v10, v2 offset:28
|
|
; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
|
|
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_u8 v9, v2 offset:30
|
|
; CHECK-NEXT: ds_read_b32 v8, v2 offset:24
|
|
; CHECK-NEXT: ds_read_u16 v10, v2 offset:28
|
|
; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
|
|
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15
|
|
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
|
|
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15
|
|
; CHECK-NEXT: ds_read_b128 v[7:10], v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p3_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p3_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16
|
|
; CHECK-NEXT: ds_read_b128 v[7:10], v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
|
|
; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24
|
|
; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28
|
|
; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
|
|
; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24
|
|
; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28
|
|
; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
|
|
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p4_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p4_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x8
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x8
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30
|
|
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p0_p5_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p0_p5_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23
|
|
; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23
|
|
; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p0_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p0_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p1_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p1_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b64 v[7:8], v2
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8
|
|
; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2
|
|
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b64 v[7:8], v2
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8
|
|
; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2
|
|
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1
|
|
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1
|
|
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v2
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2
|
|
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p3_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p3_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[3:6], v2
|
|
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8
|
|
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p4_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p4_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p1_p5_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p1_p5_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23
|
|
; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23
|
|
; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2]
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p0_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p0_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p1_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p1_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23
|
|
; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
|
|
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23
|
|
; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
|
|
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
|
|
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
|
|
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[1:4], v1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:15
|
|
; CHECK-NEXT: ds_read_b128 v[6:9], v1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[6:9]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p3_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p3_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:16
|
|
; CHECK-NEXT: ds_read_b128 v[6:9], v1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[6:9]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x2
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p4_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p4_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[2:5]
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[2:5]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p3_p5_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p3_p5_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[2:5]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30
|
|
; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28
|
|
; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
|
|
; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
|
|
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30
|
|
; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28
|
|
; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
|
|
; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
|
|
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p0_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p0_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
|
|
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
|
|
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
|
|
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p1_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p1_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b32 v8, v1 offset:24
|
|
; CHECK-NEXT: ds_read_u16 v9, v1 offset:28
|
|
; CHECK-NEXT: ds_read_u8 v10, v1 offset:30
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
|
|
; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
|
|
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b32 v8, v1 offset:24
|
|
; CHECK-NEXT: ds_read_u16 v9, v1 offset:28
|
|
; CHECK-NEXT: ds_read_u8 v10, v1 offset:30
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
|
|
; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
|
|
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
|
|
; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
|
|
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
|
|
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
|
|
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[1:4], v1
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v1
|
|
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p3_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p3_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: ds_read_b128 v[2:5], v1
|
|
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:16
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
|
|
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
|
|
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
|
|
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p4_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p4_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x1
|
|
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
|
|
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz16_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz31_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x8
|
|
; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(8)
|
|
; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz32_align_1_1:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz16_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz31_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x8
|
|
; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_waitcnt vmcnt(8)
|
|
; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz32_align_2_2:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz16_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz31_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz32_align_8_8:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz16_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x3
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz31_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_p5_p5_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
|
|
; CHECK-LABEL: memcpy_p5_p5_sz32_align_16_16:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: s_clause 0x7
|
|
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
|
|
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_waitcnt vmcnt(7)
|
|
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
|
|
; CHECK-NEXT: s_waitcnt vmcnt(6)
|
|
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
|
|
; CHECK-NEXT: s_waitcnt vmcnt(5)
|
|
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
|
|
; CHECK-NEXT: s_waitcnt vmcnt(4)
|
|
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
|
|
; CHECK-NEXT: s_waitcnt vmcnt(2)
|
|
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
|
|
; CHECK-NEXT: s_waitcnt vmcnt(1)
|
|
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
|
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
|
|
declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
|
|
|
|
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
|
|