Currently the max alignment representable is 1GB, see D108661. Setting the align of an object to 4GB is desirable in some cases to make sure the lower 32 bits are clear which can be used for some optimizations, e.g. https://crbug.com/1016945. This uses an extra bit in instructions that carry an alignment. We can store 15 bits of "free" information, and with this change some instructions (e.g. AtomicCmpXchgInst) use 14 bits. We can increase the max alignment representable above 4GB (up to 2^62) since we're only using 33 of the 64 values, but I've just limited it to 4GB for now. The one place we have to update the bitcode format is for the alloca instruction. It stores its alignment into 5 bits of a 32 bit bitfield. I've added another field which is 8 bits and should be future proof for a while. For backward compatibility, we check if the old field has a value and use that, otherwise use the new field. Updating clang's max allowed alignment will come in a future patch. Reviewed By: hans Differential Revision: https://reviews.llvm.org/D110451
153 lines
6.2 KiB
LLVM
153 lines
6.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
|
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
|
|
target triple = "nvptx64"
|
|
|
|
%struct.ident_t = type { i32, i32, i32, i32, i8* }
|
|
|
|
@no_spmd_exec_mode = weak constant i8 1
|
|
@spmd_exec_mode = weak constant i8 0
|
|
@parallel_exec_mode = weak constant i8 0
|
|
@G = external global i8
|
|
@llvm.compiler.used = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata"
|
|
|
|
;.
|
|
; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
|
|
; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
|
|
; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
|
|
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
|
|
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x i8*] [i8* @no_spmd_exec_mode, i8* @spmd_exec_mode, i8* @parallel_exec_mode], section "llvm.metadata"
|
|
;.
|
|
define weak void @none_spmd() {
|
|
; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
|
|
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
|
|
; CHECK-NEXT: call void @none_spmd_helper()
|
|
; CHECK-NEXT: call void @mixed_helper()
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
|
|
call void @none_spmd_helper()
|
|
call void @mixed_helper()
|
|
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define weak void @spmd() {
|
|
; CHECK-LABEL: define {{[^@]+}}@spmd() {
|
|
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
|
|
; CHECK-NEXT: call void @spmd_helper()
|
|
; CHECK-NEXT: call void @mixed_helper()
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
|
|
call void @spmd_helper()
|
|
call void @mixed_helper()
|
|
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define weak void @parallel() {
|
|
; CHECK-LABEL: define {{[^@]+}}@parallel() {
|
|
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* align 4294967296 null, i8 2, i1 false, i1 false)
|
|
; CHECK-NEXT: call void @spmd_helper()
|
|
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noalias noundef align 4294967296 null, i32 noundef 0, i32 noundef 0, i32 noundef 0, i32 noundef 0, i8* noalias noundef align 4294967296 null, i8* noalias noundef align 4294967296 null, i8** noalias noundef align 4294967296 null, i64 noundef 0)
|
|
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
|
|
call void @spmd_helper()
|
|
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
|
|
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define internal void @mixed_helper() {
|
|
; CHECK-LABEL: define {{[^@]+}}@mixed_helper() {
|
|
; CHECK-NEXT: [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level()
|
|
; CHECK-NEXT: store i8 [[LEVEL]], i8* @G, align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%level = call i8 @__kmpc_parallel_level()
|
|
store i8 %level, i8* @G
|
|
ret void
|
|
}
|
|
|
|
define internal void @none_spmd_helper() {
|
|
; CHECK-LABEL: define {{[^@]+}}@none_spmd_helper() {
|
|
; CHECK-NEXT: [[LEVEL12:%.*]] = call i8 @__kmpc_parallel_level()
|
|
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[LEVEL12]], 0
|
|
; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
|
|
; CHECK: t:
|
|
; CHECK-NEXT: call void @foo()
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: f:
|
|
; CHECK-NEXT: call void @bar()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%level12 = call i8 @__kmpc_parallel_level()
|
|
%c = icmp eq i8 %level12, 0
|
|
br i1 %c, label %t, label %f
|
|
t:
|
|
call void @foo()
|
|
ret void
|
|
f:
|
|
call void @bar()
|
|
ret void
|
|
}
|
|
|
|
define internal void @spmd_helper() {
|
|
; CHECK-LABEL: define {{[^@]+}}@spmd_helper() {
|
|
; CHECK-NEXT: store i8 1, i8* @G, align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%level = call i8 @__kmpc_parallel_level()
|
|
store i8 %level, i8* @G
|
|
ret void
|
|
}
|
|
|
|
define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) {
|
|
; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51
|
|
; CHECK-SAME: (%struct.ident_t* noalias nocapture nofree readnone align 4294967296 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[TMP5:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[TMP6:%.*]], i8** noalias nocapture nofree readnone align 4294967296 [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: call void @parallel_helper()
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @parallel_helper()
|
|
ret void
|
|
}
|
|
|
|
define internal void @parallel_helper() {
|
|
; CHECK-LABEL: define {{[^@]+}}@parallel_helper() {
|
|
; CHECK-NEXT: [[LEVEL:%.*]] = call i8 @__kmpc_parallel_level()
|
|
; CHECK-NEXT: store i8 [[LEVEL]], i8* @G, align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%level = call i8 @__kmpc_parallel_level()
|
|
store i8 %level, i8* @G
|
|
ret void
|
|
}
|
|
|
|
declare void @foo()
|
|
declare void @bar()
|
|
declare i8 @__kmpc_parallel_level()
|
|
declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext, i1 zeroext) #1
|
|
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext, i1 zeroext) #1
|
|
|
|
!llvm.module.flags = !{!0, !1}
|
|
!nvvm.annotations = !{!2, !3, !4}
|
|
|
|
!0 = !{i32 7, !"openmp", i32 50}
|
|
!1 = !{i32 7, !"openmp-device", i32 50}
|
|
!2 = !{void ()* @none_spmd, !"kernel", i32 1}
|
|
!3 = !{void ()* @spmd, !"kernel", i32 1}
|
|
!4 = !{void ()* @parallel, !"kernel", i32 1}
|
|
;.
|
|
; CHECK: attributes #[[ATTR0]] = { alwaysinline }
|
|
;.
|
|
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
|
|
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
|
|
; CHECK: [[META2:![0-9]+]] = !{void ()* @none_spmd, !"kernel", i32 1}
|
|
; CHECK: [[META3:![0-9]+]] = !{void ()* @spmd, !"kernel", i32 1}
|
|
; CHECK: [[META4:![0-9]+]] = !{void ()* @parallel, !"kernel", i32 1}
|
|
;.
|