[HIP][HIPSTDPAR] Re-work allocation interposition for hipstdpar (#138790)
The allocation interposition mode had a number of issues, which are primarily addressed in the library component via <https://github.com/ROCm/rocThrust/pull/543>. However, it is necessary to interpose some additional symbols, which this patch does. Furthermore, to implement this in a compatible way, we guard the new implementation under a V1 macro, which is defined in addition to the existing `__HIPSTDPAR_INTERPOSE_ALLOC__` one.
This commit is contained in:
@@ -518,7 +518,7 @@ Predefined Macros
|
||||
* - ``__HIPSTDPAR__``
|
||||
- Defined when Clang is compiling code in algorithm offload mode, enabled
|
||||
with the ``--hipstdpar`` compiler option.
|
||||
* - ``__HIPSTDPAR_INTERPOSE_ALLOC__``
|
||||
* - ``__HIPSTDPAR_INTERPOSE_ALLOC__`` / ``__HIPSTDPAR_INTERPOSE_ALLOC_V1__``
|
||||
- Defined only when compiling in algorithm offload mode, when the user
|
||||
enables interposition mode with the ``--hipstdpar-interpose-alloc``
|
||||
compiler option, indicating that all dynamic memory allocation /
|
||||
|
||||
@@ -618,8 +618,10 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
|
||||
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
|
||||
if (LangOpts.HIPStdPar) {
|
||||
Builder.defineMacro("__HIPSTDPAR__");
|
||||
if (LangOpts.HIPStdParInterposeAlloc)
|
||||
if (LangOpts.HIPStdParInterposeAlloc) {
|
||||
Builder.defineMacro("__HIPSTDPAR_INTERPOSE_ALLOC__");
|
||||
Builder.defineMacro("__HIPSTDPAR_INTERPOSE_ALLOC_V1__");
|
||||
}
|
||||
}
|
||||
if (LangOpts.CUDAIsDevice) {
|
||||
Builder.defineMacro("__HIP_DEVICE_COMPILE__");
|
||||
|
||||
@@ -304,11 +304,13 @@
|
||||
// RUN: %clang_cc1 %s -E -dM -o - -x hip --hipstdpar -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIPSTDPAR
|
||||
// CHECK-HIPSTDPAR: #define __HIPSTDPAR__ 1
|
||||
// CHECK-HIPSTDPAR-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC_V1__ 1
|
||||
// CHECK-HIPSTDPAR-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
|
||||
|
||||
// RUN: %clang_cc1 %s -E -dM -o - -x hip --hipstdpar --hipstdpar-interpose-alloc \
|
||||
// RUN: -triple x86_64-unknown-linux-gnu | FileCheck -match-full-lines %s \
|
||||
// RUN: --check-prefix=CHECK-HIPSTDPAR-INTERPOSE
|
||||
// CHECK-HIPSTDPAR-INTERPOSE: #define __HIPSTDPAR_INTERPOSE_ALLOC_V1__ 1
|
||||
// CHECK-HIPSTDPAR-INTERPOSE: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
|
||||
// CHECK-HIPSTDPAR-INTERPOSE: #define __HIPSTDPAR__ 1
|
||||
|
||||
@@ -316,4 +318,5 @@
|
||||
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device | FileCheck -match-full-lines \
|
||||
// RUN: %s --check-prefix=CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG
|
||||
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG: #define __HIPSTDPAR__ 1
|
||||
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC_V1__ 1
|
||||
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
|
||||
|
||||
@@ -231,45 +231,55 @@ PreservedAnalyses
|
||||
}
|
||||
|
||||
static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
|
||||
{"aligned_alloc", "__hipstdpar_aligned_alloc"},
|
||||
{"calloc", "__hipstdpar_calloc"},
|
||||
{"free", "__hipstdpar_free"},
|
||||
{"malloc", "__hipstdpar_malloc"},
|
||||
{"memalign", "__hipstdpar_aligned_alloc"},
|
||||
{"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
|
||||
{"realloc", "__hipstdpar_realloc"},
|
||||
{"reallocarray", "__hipstdpar_realloc_array"},
|
||||
{"_ZdaPv", "__hipstdpar_operator_delete"},
|
||||
{"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
|
||||
{"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
||||
{"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
||||
{"_ZdlPv", "__hipstdpar_operator_delete"},
|
||||
{"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
|
||||
{"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
||||
{"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
||||
{"_Znam", "__hipstdpar_operator_new"},
|
||||
{"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
||||
{"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
||||
{"_ZnamSt11align_val_tRKSt9nothrow_t",
|
||||
"__hipstdpar_operator_new_aligned_nothrow"},
|
||||
{"aligned_alloc", "__hipstdpar_aligned_alloc"},
|
||||
{"calloc", "__hipstdpar_calloc"},
|
||||
{"free", "__hipstdpar_free"},
|
||||
{"malloc", "__hipstdpar_malloc"},
|
||||
{"memalign", "__hipstdpar_aligned_alloc"},
|
||||
{"mmap", "__hipstdpar_mmap"},
|
||||
{"munmap", "__hipstdpar_munmap"},
|
||||
{"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
|
||||
{"realloc", "__hipstdpar_realloc"},
|
||||
{"reallocarray", "__hipstdpar_realloc_array"},
|
||||
{"_ZdaPv", "__hipstdpar_operator_delete"},
|
||||
{"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
|
||||
{"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
||||
{"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
||||
{"_ZdlPv", "__hipstdpar_operator_delete"},
|
||||
{"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
|
||||
{"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
|
||||
{"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
|
||||
{"_Znam", "__hipstdpar_operator_new"},
|
||||
{"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
||||
{"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
||||
{"_ZnamSt11align_val_tRKSt9nothrow_t",
|
||||
"__hipstdpar_operator_new_aligned_nothrow"},
|
||||
|
||||
{"_Znwm", "__hipstdpar_operator_new"},
|
||||
{"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
||||
{"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
||||
{"_ZnwmSt11align_val_tRKSt9nothrow_t",
|
||||
"__hipstdpar_operator_new_aligned_nothrow"},
|
||||
{"__builtin_calloc", "__hipstdpar_calloc"},
|
||||
{"__builtin_free", "__hipstdpar_free"},
|
||||
{"__builtin_malloc", "__hipstdpar_malloc"},
|
||||
{"__builtin_operator_delete", "__hipstdpar_operator_delete"},
|
||||
{"__builtin_operator_new", "__hipstdpar_operator_new"},
|
||||
{"__builtin_realloc", "__hipstdpar_realloc"},
|
||||
{"__libc_calloc", "__hipstdpar_calloc"},
|
||||
{"__libc_free", "__hipstdpar_free"},
|
||||
{"__libc_malloc", "__hipstdpar_malloc"},
|
||||
{"__libc_memalign", "__hipstdpar_aligned_alloc"},
|
||||
{"__libc_realloc", "__hipstdpar_realloc"}
|
||||
};
|
||||
{"_Znwm", "__hipstdpar_operator_new"},
|
||||
{"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
|
||||
{"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
|
||||
{"_ZnwmSt11align_val_tRKSt9nothrow_t",
|
||||
"__hipstdpar_operator_new_aligned_nothrow"},
|
||||
{"__builtin_calloc", "__hipstdpar_calloc"},
|
||||
{"__builtin_free", "__hipstdpar_free"},
|
||||
{"__builtin_malloc", "__hipstdpar_malloc"},
|
||||
{"__builtin_operator_delete", "__hipstdpar_operator_delete"},
|
||||
{"__builtin_operator_new", "__hipstdpar_operator_new"},
|
||||
{"__builtin_realloc", "__hipstdpar_realloc"},
|
||||
{"__libc_calloc", "__hipstdpar_calloc"},
|
||||
{"__libc_free", "__hipstdpar_free"},
|
||||
{"__libc_malloc", "__hipstdpar_malloc"},
|
||||
{"__libc_memalign", "__hipstdpar_aligned_alloc"},
|
||||
{"__libc_realloc", "__hipstdpar_realloc"}};
|
||||
|
||||
static constexpr std::pair<StringLiteral, StringLiteral> HiddenMap[]{
|
||||
// hidden_malloc and hidden_free are only kept for backwards compatibility /
|
||||
// legacy purposes, and we should remove them in the future
|
||||
{"__hipstdpar_hidden_malloc", "__libc_malloc"},
|
||||
{"__hipstdpar_hidden_free", "__libc_free"},
|
||||
{"__hipstdpar_hidden_memalign", "__libc_memalign"},
|
||||
{"__hipstdpar_hidden_mmap", "mmap"},
|
||||
{"__hipstdpar_hidden_munmap", "munmap"}};
|
||||
|
||||
PreservedAnalyses
|
||||
HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
|
||||
@@ -299,19 +309,14 @@ HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
|
||||
}
|
||||
}
|
||||
|
||||
if (auto F = M.getFunction("__hipstdpar_hidden_malloc")) {
|
||||
auto LibcMalloc = M.getOrInsertFunction(
|
||||
"__libc_malloc", F->getFunctionType(), F->getAttributes());
|
||||
F->replaceAllUsesWith(LibcMalloc.getCallee());
|
||||
for (auto &&HR : HiddenMap) {
|
||||
if (auto F = M.getFunction(HR.first)) {
|
||||
auto R = M.getOrInsertFunction(HR.second, F->getFunctionType(),
|
||||
F->getAttributes());
|
||||
F->replaceAllUsesWith(R.getCallee());
|
||||
|
||||
eraseFromModule(*F);
|
||||
}
|
||||
if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
|
||||
auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
|
||||
F->getAttributes());
|
||||
F->replaceAllUsesWith(LibcFree.getCallee());
|
||||
|
||||
eraseFromModule(*F);
|
||||
eraseFromModule(*F);
|
||||
}
|
||||
}
|
||||
|
||||
return PreservedAnalyses::none();
|
||||
|
||||
@@ -16,6 +16,16 @@ declare void @__hipstdpar_hidden_free(ptr)
|
||||
|
||||
declare ptr @__hipstdpar_hidden_malloc(i64)
|
||||
|
||||
declare ptr @__hipstdpar_hidden_memalign(i64, i64)
|
||||
|
||||
declare ptr @__hipstdpar_hidden_mmap(ptr, i64, i32, i32, i32, i64)
|
||||
|
||||
declare i32 @__hipstdpar_hidden_munmap(ptr, i64)
|
||||
|
||||
declare ptr @__hipstdpar_mmap(ptr, i64, i32, i32, i32, i64)
|
||||
|
||||
declare i32 @__hipstdpar_munmap(ptr, i64)
|
||||
|
||||
declare ptr @__hipstdpar_realloc(ptr, i64)
|
||||
|
||||
declare ptr @__hipstdpar_realloc_array(ptr, i64, i64)
|
||||
@@ -171,7 +181,21 @@ define dso_local noundef i32 @allocs() {
|
||||
; CHECK: call void @__hipstdpar_free(ptr noundef %28)
|
||||
call void @__libc_free(ptr noundef %28)
|
||||
|
||||
ret i32 0
|
||||
; CHECK: %29 = call ptr @__libc_malloc(i64 noundef 8)
|
||||
%29 = call ptr @__hipstdpar_hidden_malloc(i64 noundef 8)
|
||||
; CHECK: call void @__libc_free(ptr noundef %29)
|
||||
call void @__hipstdpar_hidden_free(ptr noundef %29)
|
||||
|
||||
; CHECK: %30 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 4)
|
||||
%30 = call ptr @__hipstdpar_hidden_memalign(i64 noundef 8, i64 noundef 4)
|
||||
; CHECK: %31 = call ptr @mmap(ptr %30, i64 8, i32 0, i32 0, i32 0, i64 0)
|
||||
%31 = call ptr @__hipstdpar_hidden_mmap(ptr %30, i64 8, i32 0, i32 0, i32 0, i64 0)
|
||||
; CHECK: %32 = call i32 @munmap(ptr %31, i64 8)
|
||||
%32 = call i32 @__hipstdpar_hidden_munmap(ptr %31, i64 8)
|
||||
; CHECK: call void @__libc_free(ptr noundef %30)
|
||||
call void @__hipstdpar_hidden_free(ptr noundef %30)
|
||||
|
||||
ret i32 %32
|
||||
}
|
||||
|
||||
declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef)
|
||||
@@ -220,4 +244,8 @@ declare void @__libc_free(ptr noundef)
|
||||
|
||||
declare ptr @__libc_malloc(i64 noundef)
|
||||
|
||||
declare ptr @__libc_memalign(i64 noundef, i64 noundef)
|
||||
declare ptr @__libc_memalign(i64 noundef, i64 noundef)
|
||||
|
||||
declare ptr @mmap(ptr noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef)
|
||||
|
||||
declare i32 @munmap(ptr noundef, i64 noundef)
|
||||
|
||||
Reference in New Issue
Block a user