Files
clang-p2996/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
Matt Arsenault 29f303799b AMDGPU/GlobalISel: Implement custom kernel arg lowering
Avoid using allocateKernArg / AssignFn. We do not want any
of the type splitting properties of normal calling convention
lowering.

For now at least this exists alongside the IR argument lowering
pass. This is necessary to handle struct padding correctly while
some arguments are still skipped by the IR argument lowering
pass.

llvm-svn: 336373
2018-07-05 17:01:20 +00:00

724 lines
40 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; REQUIRES: global-isel
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA-VI %s
define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
; HSA-VI-LABEL: name: i8_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = zext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
; HSA-VI-LABEL: name: i8_zext_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = zext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
; HSA-VI-LABEL: name: i8_sext_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = sext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
; HSA-VI-LABEL: name: i16_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 2 from `i16 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = zext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
; HSA-VI-LABEL: name: i16_zext_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 2 from `i16 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = zext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
; HSA-VI-LABEL: name: i16_sext_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 2 from `i16 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = sext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
; HSA-VI-LABEL: name: i32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store i32 %in, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
; HSA-VI-LABEL: name: f32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `float addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 4 from `float addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store float %in, float addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
; HSA-VI-LABEL: name: v2i8_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<2 x i8> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 2 from `<2 x i8> addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store 2 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
; HSA-VI-LABEL: name: v2i16_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<2 x i16> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 4 from `<2 x i16> addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
; HSA-VI-LABEL: name: v2i32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<2 x i32> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 8 from `<2 x i32> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
; HSA-VI-LABEL: name: v2f32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<2 x float> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 8 from `<2 x float> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
; HSA-VI-LABEL: name: v3i8_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<3 x i8> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 3 from `<3 x i8> addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store 3 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
; HSA-VI-LABEL: name: v3i16_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<3 x i16> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 6 from `<3 x i16> addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store 6 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
; HSA-VI-LABEL: name: v3i32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<3 x i32> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 12 from `<3 x i32> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
; HSA-VI-LABEL: name: v3f32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<3 x float> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 12 from `<3 x float> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
; HSA-VI-LABEL: name: v4i8_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<4 x i8> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 4 from `<4 x i8> addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
; HSA-VI-LABEL: name: v4i16_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<4 x i16> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 8 from `<4 x i16> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
; HSA-VI-LABEL: name: v4i32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<4 x i32> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 16 from `<4 x i32> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
; HSA-VI-LABEL: name: v4f32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<4 x float> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 16 from `<4 x float> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
; HSA-VI-LABEL: name: v8i8_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<8 x i8> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 8 from `<8 x i8> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
; HSA-VI-LABEL: name: v8i16_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<8 x i16> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 16 from `<8 x i16> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
; HSA-VI-LABEL: name: v8i32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<8 x i32> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 32 from `<8 x i32> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
; HSA-VI-LABEL: name: v8f32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<8 x float> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 32 from `<8 x float> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
; HSA-VI-LABEL: name: v16i8_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<16 x i8> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 16 from `<16 x i8> addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
; HSA-VI-LABEL: name: v16i16_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<16 x i16> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 32 from `<16 x i16> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store 32 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
ret void
}
define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
; HSA-VI-LABEL: name: v16i32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<16 x i32> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 64 from `<16 x i32> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
; HSA-VI-LABEL: name: v16f32_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `<16 x float> addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 64 from `<16 x float> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
; HSA-VI-LABEL: name: kernel_arg_i64
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i64 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
store i64 %a, i64 addrspace(1)* %out, align 8
ret void
}
define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
; HSA-VI-LABEL: name: f64_kernel_arg
; HSA-VI: bb.1.entry:
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `double addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 8 from `double addrspace(4)* undef`, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
entry:
store double %in, double addrspace(1)* %out
ret void
}
define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
; HSA-VI-LABEL: name: i1_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i1 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i1 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store 1 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
store i1 %x, i1 addrspace(1)* %out, align 1
ret void
}
define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
; HSA-VI-LABEL: name: i1_arg_zext_i32
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i1 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = zext i1 %x to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
; HSA-VI-LABEL: name: i1_arg_zext_i64
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i64 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i1 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = zext i1 %x to i64
store i64 %ext, i64 addrspace(1)* %out, align 8
ret void
}
define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
; HSA-VI-LABEL: name: i1_arg_sext_i32
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i32 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i1 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = sext i1 %x to i32
store i32 %ext, i32addrspace(1)* %out, align 4
ret void
}
define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
; HSA-VI-LABEL: name: i1_arg_sext_i64
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 8 from `i64 addrspace(1)* addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i1 addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM
%ext = sext i1 %x to i64
store i64 %ext, i64 addrspace(1)* %out, align 8
ret void
}
define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
; HSA-VI-LABEL: name: empty_struct_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: S_ENDPGM
ret void
}
; The correct load offsets for these:
; load 4 from 0,
; load 8 from 8
; load 4 from 24
; load 8 from 32
; With the SelectionDAG argument lowering, the alignments for the
; struct members is not properly considered, making these wrong.
; FIXME: GlobalISel extractvalue emission broken
define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) {
; %val0 = extractvalue {i32, i64} %arg0, 0
; %val1 = extractvalue {i32, i64} %arg0, 1
; %val2 = extractvalue {i32, i64} %arg1, 0
; %val3 = extractvalue {i32, i64} %arg1, 1
; store volatile i32 %val0, i32 addrspace(1)* null
; store volatile i64 %val1, i64 addrspace(1)* null
; store volatile i32 %val2, i32 addrspace(1)* null
; store volatile i64 %val3, i64 addrspace(1)* null
; HSA-VI-LABEL: name: struct_argument_alignment
; HSA-VI: bb.1 (%ir-block.1):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 16 from `{ i32, i64 } addrspace(4)* undef`, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; HSA-VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64)
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p4) :: (non-temporal invariant load 16 from `{ i32, i64 } addrspace(4)* undef`, align 8, addrspace 4)
; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
; HSA-VI: S_ENDPGM
ret void
}
; No padding between i8 and next struct, but round up at end to 4 byte
; multiple.
define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
; %val0 = extractvalue <{i32, i64}> %arg0, 0
; %val1 = extractvalue <{i32, i64}> %arg0, 1
; %val2 = extractvalue <{i32, i64}> %arg1, 0
; %val3 = extractvalue <{i32, i64}> %arg1, 1
; store volatile i32 %val0, i32 addrspace(1)* null
; store volatile i64 %val1, i64 addrspace(1)* null
; store volatile i32 %val2, i32 addrspace(1)* null
; store volatile i64 %val3, i64 addrspace(1)* null
; HSA-VI-LABEL: name: packed_struct_argument_alignment
; HSA-VI: bb.1 (%ir-block.1):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 12 from `<{ i32, i64 }> addrspace(4)* undef`, align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 4, addrspace 4)
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
; HSA-VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64)
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s96) = G_LOAD [[GEP2]](p4) :: (non-temporal invariant load 12 from `<{ i32, i64 }> addrspace(4)* undef`, align 1, addrspace 4)
; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s96), 0
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s96), 32
; HSA-VI: S_ENDPGM
ret void
}