This commit ensures than noundef (which is frequently a prerequisite for other annotations) and range() annotations on kernel arguments are copied onto their corresponding load from the kernel argument structure.
1866 lines
130 KiB
LLVM
1866 lines
130 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -passes=amdgpu-lower-kernel-arguments %s | FileCheck -check-prefixes=GCN,HSA %s
|
|
; RUN: opt -mtriple=amdgcn-- -S -o - -passes=amdgpu-lower-kernel-arguments %s | FileCheck -check-prefixes=GCN,MESA %s
|
|
|
|
target datalayout = "A5"
|
|
|
|
declare void @llvm.fake.use(...)
|
|
|
|
define amdgpu_kernel void @kern_noargs() {
|
|
; GCN-LABEL: @kern_noargs(
|
|
; GCN-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
|
|
; HSA-LABEL: @kern_i8(
|
|
; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1:![0-9]+]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i8(
|
|
; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1:![0-9]+]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
|
|
; HSA-LABEL: @kern_i16(
|
|
; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i16(
|
|
; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_f16(half %arg) #0 {
|
|
; HSA-LABEL: @kern_f16(
|
|
; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; HSA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_f16(
|
|
; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; MESA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store half %arg, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
|
|
; HSA-LABEL: @kern_zeroext_i8(
|
|
; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_zeroext_i8(
|
|
; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
|
|
; HSA-LABEL: @kern_zeroext_i16(
|
|
; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_zeroext_i16(
|
|
; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
|
|
; HSA-LABEL: @kern_signext_i8(
|
|
; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_signext_i8(
|
|
; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
|
|
; HSA-LABEL: @kern_signext_i16(
|
|
; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_signext_i16(
|
|
; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
|
|
; HSA-LABEL: @kern_i8_i8(
|
|
; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i8_i8(
|
|
; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, ptr addrspace(1) undef, align 1
|
|
store volatile i8 %arg1, ptr addrspace(1) undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
|
|
; HSA-LABEL: @kern_v3i8(
|
|
; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
|
|
; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v3i8(
|
|
; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
|
|
; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <3 x i8> %arg, ptr addrspace(1) undef, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i24(i24 %arg0) {
|
|
; HSA-LABEL: @kern_i24(
|
|
; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; HSA-NEXT: store i24 [[TMP2]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i24(
|
|
; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; MESA-NEXT: store i24 [[TMP2]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i24 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i32(i32 %arg0) {
|
|
; HSA-LABEL: @kern_i32(
|
|
; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i32(
|
|
; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %arg0) {
|
|
; HSA-LABEL: @kern_range_noundef_i32(
|
|
; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
|
|
; HSA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_range_noundef_i32(
|
|
; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
|
|
; MESA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
|
|
; MESA-NEXT: ret void
|
|
;
|
|
call void (...) @llvm.fake.use(i32 %arg0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_f32(float %arg0) {
|
|
; HSA-LABEL: @kern_f32(
|
|
; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_f32(
|
|
; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store float %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
|
|
; HSA-LABEL: @kern_v3i32(
|
|
; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v3i32(
|
|
; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <3 x i32> %arg0, ptr addrspace(1) undef, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
|
|
; HSA-LABEL: @kern_v8i32(
|
|
; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) undef, align 32
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v8i32(
|
|
; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) undef, align 32
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <8 x i32> %arg, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
|
|
; HSA-LABEL: @kern_v8i64(
|
|
; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 64
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v8i64(
|
|
; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 64
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <8 x i64> %arg, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
|
|
; HSA-LABEL: @kern_v16i64(
|
|
; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 128
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v16i64(
|
|
; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 128
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <16 x i64> %arg, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
|
|
; HSA-LABEL: @kern_i32_v3i32(
|
|
; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16
|
|
; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i32_v3i32(
|
|
; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52
|
|
; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, ptr addrspace(1) undef
|
|
store <3 x i32> %arg1, ptr addrspace(1) undef, align 4
|
|
ret void
|
|
}
|
|
|
|
%struct.a = type { i32, i8, [4 x i8] }
|
|
%struct.b.packed = type { i8, i32, [3 x i16], <2 x double> }
|
|
|
|
define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
|
|
; HSA-LABEL: @kern_struct_a(
|
|
; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_struct_a(
|
|
; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store %struct.a %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
|
|
; HSA-LABEL: @kern_struct_b_packed(
|
|
; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 16
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_struct_b_packed(
|
|
; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 16
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store %struct.b.packed %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
|
|
; HSA-LABEL: @kern_implicit_arg_num_bytes(
|
|
; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_implicit_arg_num_bytes(
|
|
; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 {
|
|
; HSA-LABEL: @kernel_implicitarg_no_struct_align(
|
|
; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kernel_implicitarg_no_struct_align(
|
|
; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_lds_ptr(ptr addrspace(3) %lds) #0 {
|
|
; HSA-LABEL: @kern_lds_ptr(
|
|
; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_lds_ptr(
|
|
; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 0, ptr addrspace(3) %lds, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_lds_ptr_si(ptr addrspace(3) %lds) #2 {
|
|
; GCN-LABEL: @kern_lds_ptr_si(
|
|
; GCN-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; GCN-NEXT: store i32 0, ptr addrspace(3) [[LDS:%.*]], align 4
|
|
; GCN-NEXT: ret void
|
|
;
|
|
store i32 0, ptr addrspace(3) %lds, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, ptr addrspace(1) undef
|
|
store volatile i8 %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, ptr addrspace(1) undef
|
|
store volatile i8 %arg1, ptr addrspace(1) undef
|
|
store volatile i8 %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, ptr addrspace(1) undef
|
|
store volatile i8 %arg1, ptr addrspace(1) undef
|
|
store volatile i8 %arg2, ptr addrspace(1) undef
|
|
store volatile i8 %arg3, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_v3i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_v3i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, ptr addrspace(1) undef
|
|
store volatile <3 x i8> %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i16(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i16(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, ptr addrspace(1) undef
|
|
store volatile i16 %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, ptr addrspace(1) undef
|
|
store volatile i1 %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, ptr addrspace(1) undef
|
|
store volatile i1 %arg1, ptr addrspace(1) undef
|
|
store volatile i1 %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i1 [[TMP11]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i1 [[TMP11]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, ptr addrspace(1) undef
|
|
store volatile i1 %arg1, ptr addrspace(1) undef
|
|
store volatile i1 %arg2, ptr addrspace(1) undef
|
|
store volatile i1 %arg3, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_v3i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1>
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_v3i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1>
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, ptr addrspace(1) undef
|
|
store volatile <3 x i1> %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i16(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i16(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, ptr addrspace(1) undef
|
|
store volatile i16 %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
|
|
; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
|
|
; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
|
|
; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
|
|
; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
|
|
; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP14]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP17]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP20]], ptr addrspace(1) undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
|
|
; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
|
|
; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
|
|
; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
|
|
; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
|
|
; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP14]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP17]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP20]], ptr addrspace(1) undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, ptr addrspace(1) undef
|
|
store volatile i8 %arg1, ptr addrspace(1) undef
|
|
store volatile i8 %arg2, ptr addrspace(1) undef
|
|
store volatile i8 %arg3, ptr addrspace(1) undef
|
|
store volatile i8 %arg5, ptr addrspace(1) undef
|
|
store volatile i8 %arg6, ptr addrspace(1) undef
|
|
store volatile i8 %arg7, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_f16_f16(
|
|
; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
|
|
; HSA-NEXT: store volatile half [[ARG0_LOAD]], ptr addrspace(1) undef, align 2
|
|
; HSA-NEXT: store volatile half [[ARG1_LOAD]], ptr addrspace(1) undef, align 2
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_f16_f16(
|
|
; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
|
|
; MESA-NEXT: store volatile half [[ARG0_LOAD]], ptr addrspace(1) undef, align 2
|
|
; MESA-NEXT: store volatile half [[ARG1_LOAD]], ptr addrspace(1) undef, align 2
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile half %arg0, ptr addrspace(1) undef
|
|
store volatile half %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr(ptr addrspace(1) %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) dereferenceable(42) %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr_dereferencable(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
|
|
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr_dereferencable(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
|
|
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(1) dereferenceable_or_null(128) %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
|
|
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
|
|
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_nonnull_global_ptr(ptr addrspace(1) nonnull %ptr) #0 {
|
|
; HSA-LABEL: @kern_nonnull_global_ptr(
|
|
; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !nonnull [[META1]]
|
|
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_nonnull_global_ptr(
|
|
; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !nonnull [[META1]]
|
|
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %ptr) #0 {
|
|
; HSA-LABEL: @kern_align32_global_ptr(
|
|
; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]]
|
|
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_align32_global_ptr(
|
|
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]]
|
|
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_noalias_global_ptr(ptr addrspace(1) noalias %ptr) #0 {
|
|
; GCN-LABEL: @kern_noalias_global_ptr(
|
|
; GCN-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; GCN-NEXT: store volatile ptr addrspace(1) [[PTR:%.*]], ptr addrspace(1) undef, align 8
|
|
; GCN-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_noalias_global_ptr_x2(ptr addrspace(1) noalias %ptr0, ptr addrspace(1) noalias %ptr1) #0 {
|
|
; GCN-LABEL: @kern_noalias_global_ptr_x2(
|
|
; GCN-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; GCN-NEXT: store volatile ptr addrspace(1) [[PTR0:%.*]], ptr addrspace(1) undef, align 8
|
|
; GCN-NEXT: store volatile ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(1) undef, align 8
|
|
; GCN-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr0, ptr addrspace(1) undef
|
|
store volatile ptr addrspace(1) %ptr1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr) #0 {
|
|
; HSA-LABEL: @kern_noundef_global_ptr(
|
|
; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
|
|
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_noundef_global_ptr(
|
|
; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
|
|
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile ptr addrspace(1) %ptr, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
|
|
; HSA-LABEL: @struct_i8_i8_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @struct_i8_i8_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue {i8, i8} %in, 0
|
|
%elt1 = extractvalue {i8, i8} %in, 1
|
|
store volatile i8 %elt0, ptr addrspace(1) null, align 4
|
|
store volatile i8 %elt1, ptr addrspace(1) null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
|
|
; HSA-LABEL: @struct_i8_i16_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: store volatile i16 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @struct_i8_i16_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: store volatile i16 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue {i8, i16} %in, 0
|
|
%elt1 = extractvalue {i8, i16} %in, 1
|
|
store volatile i8 %elt0, ptr addrspace(1) null, align 4
|
|
store volatile i16 %elt1, ptr addrspace(1) null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
|
|
; HSA-LABEL: @array_2xi8_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @array_2xi8_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue [2 x i8] %in, 0
|
|
%elt1 = extractvalue [2 x i8] %in, 1
|
|
store volatile i8 %elt0, ptr addrspace(1) null, align 4
|
|
store volatile i8 %elt1, ptr addrspace(1) null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
|
|
; HSA-LABEL: @array_2xi1_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: store volatile i1 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @array_2xi1_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: store volatile i1 [[ELT1]], ptr addrspace(1) null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue [2 x i1] %in, 0
|
|
%elt1 = extractvalue [2 x i1] %in, 1
|
|
store volatile i1 %elt0, ptr addrspace(1) null, align 4
|
|
store volatile i1 %elt1, ptr addrspace(1) null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
|
|
; GCN-LABEL: @only_empty_struct(
|
|
; GCN-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; GCN-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
|
|
; HSA-LABEL: @empty_struct_with_other(
|
|
; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @empty_struct_with_other(
|
|
; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; Should insert code after the allocas
|
|
define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
|
|
; HSA-LABEL: @static_alloca_kern_i32(
|
|
; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
|
|
; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @static_alloca_kern_i32(
|
|
; MESA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
|
|
; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%alloca = alloca i32, addrspace(5)
|
|
store volatile i32 %arg0, ptr addrspace(5) %alloca
|
|
ret void
|
|
}
|
|
|
|
; Make sure we don't break the IR if an alloca depends on the
|
|
; kernargs.
|
|
define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
|
|
; HSA-LABEL: @dyn_alloca_kernarg_i32(
|
|
; HSA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
|
|
; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5)
|
|
; HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4
|
|
; HSA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @dyn_alloca_kernarg_i32(
|
|
; MESA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
|
|
; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5)
|
|
; MESA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4
|
|
; MESA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%alloca0 = alloca i32, addrspace(5)
|
|
%alloca1 = alloca i32, i32 %n, addrspace(5)
|
|
store volatile i32 0, ptr addrspace(5) %alloca0
|
|
store volatile i32 1, ptr addrspace(5) %alloca1
|
|
ret void
|
|
}
|
|
|
|
; Byref pointers should only be treated as offsets from kernarg
|
|
define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) {
|
|
; HSA-LABEL: @byref_constant_i8_arg(
|
|
; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1
|
|
; HSA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32
|
|
; HSA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_constant_i8_arg(
|
|
; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1
|
|
; MESA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32
|
|
; MESA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i8, ptr addrspace(4) %in.byref
|
|
%ext = zext i8 %in to i32
|
|
store i32 %ext, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) {
|
|
; HSA-LABEL: @byref_constant_i16_arg(
|
|
; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2
|
|
; HSA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32
|
|
; HSA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_constant_i16_arg(
|
|
; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2
|
|
; MESA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32
|
|
; MESA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i16, ptr addrspace(4) %in.byref
|
|
%ext = zext i16 %in to i32
|
|
store i32 %ext, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) {
|
|
; HSA-LABEL: @byref_constant_i32_arg(
|
|
; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12
|
|
; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_constant_i32_arg(
|
|
; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48
|
|
; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr addrspace(4) %in.byref
|
|
store volatile i32 %in, ptr addrspace(1) %out, align 4
|
|
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) {
|
|
; HSA-LABEL: @byref_constant_v4i32_arg(
|
|
; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(296) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 16
|
|
; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 32
|
|
; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16
|
|
; HSA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_constant_v4i32_arg(
|
|
; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(292) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 52
|
|
; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 68
|
|
; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16
|
|
; MESA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load <4 x i32>, ptr addrspace(4) %in.byref
|
|
store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4
|
|
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
|
|
; HSA-LABEL: @byref_align_constant_i32_arg(
|
|
; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 256
|
|
; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 260
|
|
; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_align_constant_i32_arg(
|
|
; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 292
|
|
; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 296
|
|
; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 8, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr addrspace(4) %in.byref
|
|
store volatile i32 %in, ptr addrspace(1) %out, align 4
|
|
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) %in.byref, i32 %after.offset) {
|
|
; HSA-LABEL: @byref_natural_align_constant_v16i32_arg(
|
|
; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(392) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 64
|
|
; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 128
|
|
; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64
|
|
; HSA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_natural_align_constant_v16i32_arg(
|
|
; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(388) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 100
|
|
; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 164
|
|
; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64
|
|
; MESA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load <16 x i32>, ptr addrspace(4) %in.byref
|
|
store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4
|
|
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; Also accept byref kernel arguments with other global address spaces.
|
|
define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) {
|
|
; HSA-LABEL: @byref_global_i32_arg(
|
|
; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
|
|
; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_global_i32_arg(
|
|
; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1)
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
|
|
; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr addrspace(1) %in.byref
|
|
store i32 %in, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) %in.byref) {
|
|
; HSA-LABEL: @byref_flat_i32_arg(
|
|
; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4
|
|
; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_flat_i32_arg(
|
|
; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4
|
|
; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr %in.byref
|
|
store i32 %in, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) %in.byref) {
|
|
; HSA-LABEL: @byref_constant_32bit_i32_arg(
|
|
; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6)
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4
|
|
; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_constant_32bit_i32_arg(
|
|
; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6)
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4
|
|
; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr addrspace(6) %in.byref
|
|
store i32 %in, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref(i32) %in.byref) {
|
|
; HSA-LABEL: @byref_unknown_as_i32_arg(
|
|
; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999)
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4
|
|
; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_unknown_as_i32_arg(
|
|
; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999)
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4
|
|
; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr addrspace(999) %in.byref
|
|
store i32 %in, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; Invalid, but should not crash.
|
|
define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(3) byref(i32) %in.byref) {
|
|
; HSA-LABEL: @byref_local_i32_arg(
|
|
; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3)
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
|
; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_local_i32_arg(
|
|
; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3)
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
|
; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr addrspace(3) %in.byref
|
|
store i32 %in, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) {
|
|
; HSA-LABEL: @multi_byref_constant_i32_arg(
|
|
; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(280) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8
|
|
; HSA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12
|
|
; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 16
|
|
; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
|
|
; HSA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; HSA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; HSA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: store volatile i32 [[IN1]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @multi_byref_constant_i32_arg(
|
|
; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(276) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44
|
|
; MESA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48
|
|
; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 52
|
|
; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
|
|
; MESA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; MESA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; MESA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: store volatile i32 [[IN1]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in0 = load i32, ptr addrspace(4) %in0.byref
|
|
%in1 = load i32, ptr addrspace(4) %in1.byref
|
|
store volatile i32 %in0, ptr addrspace(1) %out, align 4
|
|
store volatile i32 %in1, ptr addrspace(1) %out, align 4
|
|
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) {
|
|
; HSA-LABEL: @byref_constant_i32_arg_offset0(
|
|
; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @byref_constant_i32_arg_offset0(
|
|
; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4
|
|
; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
%in = load i32, ptr addrspace(4) %in.byref
|
|
store i32 %in, ptr addrspace(1) undef, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @noundef_f32(float noundef %arg0) {
|
|
; HSA-LABEL: @noundef_f32(
|
|
; HSA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
|
|
; HSA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @noundef_f32(
|
|
; MESA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
|
|
; MESA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
|
|
; MESA-NEXT: ret void
|
|
;
|
|
call void (...) @llvm.fake.use(float %arg0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @noundef_f16(half noundef %arg0) {
|
|
; HSA-LABEL: @noundef_f16(
|
|
; HSA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]], !noundef [[META1]]
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; HSA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @noundef_f16(
|
|
; MESA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]], !noundef [[META1]]
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; MESA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
|
|
; MESA-NEXT: ret void
|
|
;
|
|
call void (...) @llvm.fake.use(half %arg0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @noundef_v2i32(<2 x i32> noundef %arg0) {
|
|
; HSA-LABEL: @noundef_v2i32(
|
|
; HSA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
|
|
; HSA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @noundef_v2i32(
|
|
; MESA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
|
|
; MESA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
|
|
; MESA-NEXT: ret void
|
|
;
|
|
call void (...) @llvm.fake.use(<2 x i32> %arg0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @noundef_p0(ptr noundef %arg0) {
|
|
; HSA-LABEL: @noundef_p0(
|
|
; HSA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
|
|
; HSA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @noundef_p0(
|
|
; MESA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
|
|
; MESA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
|
|
; MESA-NEXT: ret void
|
|
;
|
|
call void (...) @llvm.fake.use(ptr %arg0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @noundef_v2p0(<2 x ptr> noundef %arg0) {
|
|
; HSA-LABEL: @noundef_v2p0(
|
|
; HSA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
|
|
; HSA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @noundef_v2p0(
|
|
; MESA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
|
|
; MESA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
|
|
; MESA-NEXT: ret void
|
|
;
|
|
call void (...) @llvm.fake.use(<2 x ptr> %arg0)
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind "target-cpu"="kaveri" }
|
|
attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
|
|
attributes #2 = { nounwind "target-cpu"="tahiti" }
|
|
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
;.
|
|
; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
|
|
; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
|
|
; HSA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
|
|
; HSA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
|
|
; HSA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
;.
|
|
; MESA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
|
|
; MESA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
|
|
; MESA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
|
|
; MESA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
|
|
; MESA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
;.
|
|
; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
; HSA: [[META1]] = !{}
|
|
; HSA: [[RNG2]] = !{i32 0, i32 8}
|
|
; HSA: [[META3]] = !{i64 42}
|
|
; HSA: [[META4]] = !{i64 128}
|
|
; HSA: [[META5]] = !{i64 1024}
|
|
;.
|
|
; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
|
|
; MESA: [[META1]] = !{}
|
|
; MESA: [[RNG2]] = !{i32 0, i32 8}
|
|
; MESA: [[META3]] = !{i64 42}
|
|
; MESA: [[META4]] = !{i64 128}
|
|
; MESA: [[META5]] = !{i64 1024}
|
|
;.
|