[OpenMP] Add memory allocation using hwloc (#132843)

This patch adds support for memory allocation using hwloc. To enable
memory allocation using hwloc, env KMP_TOPOLOGY_METHOD=hwloc needs to be
used. If hwloc is not supported/available, allocation will fallback to
default path.
This commit is contained in:
nawrinsu
2025-04-02 00:17:50 -07:00
committed by GitHub
parent 536fe74aaa
commit 730e8a4a59
6 changed files with 346 additions and 111 deletions

View File

@@ -1107,6 +1107,7 @@ extern omp_allocator_handle_t __kmp_def_allocator;
#endif
extern int __kmp_memkind_available;
extern bool __kmp_hwloc_available;
typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
@@ -1119,6 +1120,9 @@ typedef struct kmp_allocator_t {
kmp_uint64 pool_size;
kmp_uint64 pool_used;
bool pinned;
#if KMP_USE_HWLOC
omp_alloctrait_value_t membind;
#endif
} kmp_allocator_t;
extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,

View File

@@ -1444,6 +1444,7 @@ void KMPAffinity::pick_api() {
if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
__kmp_affinity.type != affinity_disabled) {
affinity_dispatch = new KMPHwlocAffinity();
__kmp_hwloc_available = true;
} else
#endif
{

View File

@@ -14,6 +14,20 @@
#include "kmp_io.h"
#include "kmp_wrapper_malloc.h"
#if KMP_USE_HWLOC
#if HWLOC_API_VERSION > 0x00020300
#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
#elif HWLOC_API_VERSION == 0x00020300
#define KMP_HWLOC_LOCATION_TYPE_CPUSET \
hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
#else
enum hwloc_memattr_id_e {
HWLOC_MEMATTR_ID_BANDWIDTH,
HWLOC_MEMATTR_ID_CAPACITY
};
#endif
#endif // KMP_USE_HWLOC
// Disable bget when it is not used
#if KMP_USE_BGET
@@ -1356,6 +1370,74 @@ void __kmp_fini_memkind() {
#endif
}
#if KMP_USE_HWLOC
static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
#if HWLOC_API_VERSION >= 0x00020300
const hwloc_topology_support *support;
support = hwloc_topology_get_support(__kmp_hwloc_topology);
if (support) {
if (policy == HWLOC_MEMBIND_BIND)
return (support->membind->alloc_membind &&
support->membind->bind_membind);
if (policy == HWLOC_MEMBIND_INTERLEAVE)
return (support->membind->alloc_membind &&
support->membind->interleave_membind);
}
return false;
#else
return false;
#endif
}
void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
hwloc_membind_policy_t policy) {
#if HWLOC_API_VERSION >= 0x00020300
void *ptr = NULL;
hwloc_obj_t node;
struct hwloc_location initiator;
int ret;
// TODO: We should make this more efficient by getting rid of the OS syscall
// 'hwloc_bitmap_alloc' and 'hwloc_get_cpubind' to get affinity and instead
// use th_affin_mask field when it's capable of getting the underlying
// mask implementation.
hwloc_cpuset_t mask = hwloc_bitmap_alloc();
ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
if (ret < 0) {
hwloc_bitmap_free(mask);
return ptr;
}
initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
initiator.location.cpuset = mask;
ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
&node, NULL);
if (ret < 0) {
return ptr;
}
return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
HWLOC_MEMBIND_BYNODESET);
#else
return NULL;
#endif
}
void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
hwloc_membind_policy_t policy) {
#if HWLOC_API_VERSION >= 0x00020300
void *ptr = NULL;
if (ms == omp_high_bw_mem_space) {
ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
} else if (ms == omp_large_cap_mem_space) {
ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
} else {
ptr = hwloc_alloc(__kmp_hwloc_topology, size);
}
return ptr;
#else
return NULL;
#endif
}
#endif // KMP_USE_HWLOC
void __kmp_init_target_mem() {
*(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
*(void **)(&kmp_target_alloc_shared) =
@@ -1412,6 +1494,13 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
break;
case omp_atk_partition:
#if KMP_USE_HWLOC
al->membind = (omp_alloctrait_value_t)traits[i].value;
KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
al->membind == omp_atv_nearest ||
al->membind == omp_atv_blocked ||
al->membind == omp_atv_interleaved);
#endif
al->memkind = RCAST(void **, traits[i].value);
break;
default:
@@ -1466,7 +1555,8 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
__kmp_free(al);
return omp_null_allocator;
} else {
if (ms == omp_high_bw_mem_space) {
if (!__kmp_hwloc_available &&
(ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
// cannot detect HBW memory presence without memkind library
__kmp_free(al);
return omp_null_allocator;
@@ -1573,8 +1663,9 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
if (allocator > kmp_max_mem_alloc)
is_pinned = al->pinned;
// Use default allocator if libmemkind is not available
int use_default_allocator = (__kmp_memkind_available) ? false : true;
// Use default allocator if hwloc and libmemkind are not available
int use_default_allocator =
(!__kmp_hwloc_available && !__kmp_memkind_available);
if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
// Use size input directly as the memory may not be accessible on host.
@@ -1610,38 +1701,152 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
}
}
if (__kmp_memkind_available) {
if (allocator < kmp_max_mem_alloc) {
// pre-defined allocator
if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
} else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
#if KMP_USE_HWLOC
if (__kmp_hwloc_available) {
if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
if (allocator < kmp_max_mem_alloc) {
// pre-defined allocator
if (allocator == omp_high_bw_mem_alloc) {
ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
desc.size_a, HWLOC_MEMBIND_BIND);
if (ptr == NULL)
use_default_allocator = true;
} else if (allocator == omp_large_cap_mem_alloc) {
ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
desc.size_a, HWLOC_MEMBIND_BIND);
if (ptr == NULL)
use_default_allocator = true;
} else {
use_default_allocator = true;
}
if (use_default_allocator) {
ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
}
} else if (al->pool_size > 0) {
// custom allocator with pool size requested
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
if (used + desc.size_a > al->pool_size) {
// not enough space, need to go fallback path
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
} // else ptr == NULL;
} else {
// pool has enough space
if (al->membind == omp_atv_interleaved) {
if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
HWLOC_MEMBIND_INTERLEAVE);
}
} else if (al->membind == omp_atv_environment) {
ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
HWLOC_MEMBIND_DEFAULT);
} else {
ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
}
if (ptr == NULL) {
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
}
}
}
} else {
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
// custom allocator, pool size not requested
if (al->membind == omp_atv_interleaved) {
if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
HWLOC_MEMBIND_INTERLEAVE);
}
} else if (al->membind == omp_atv_environment) {
ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
HWLOC_MEMBIND_DEFAULT);
} else {
ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
}
if (ptr == NULL) {
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
}
}
}
} else if (al->pool_size > 0) {
// custom allocator with pool size requested
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
if (used + desc.size_a > al->pool_size) {
// not enough space, need to go fallback path
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
} else { // alloc membind not supported, use hwloc_alloc
ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
}
} else {
#endif
if (__kmp_memkind_available) {
if (allocator < kmp_max_mem_alloc) {
// pre-defined allocator
if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
} else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
} else {
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
} // else ptr == NULL;
}
} else if (al->pool_size > 0) {
// custom allocator with pool size requested
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
if (used + desc.size_a > al->pool_size) {
// not enough space, need to go fallback path
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
} // else ptr == NULL;
} else {
// pool has enough space
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
}
}
}
} else {
// pool has enough space
// custom allocator, pool size not requested
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
if (al->fb == omp_atv_default_mem_fb) {
@@ -1659,13 +1864,39 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
}
}
}
} else {
// custom allocator, pool size not requested
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
} else if (allocator < kmp_max_mem_alloc) {
// pre-defined allocator
if (allocator == omp_high_bw_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
} else if (allocator == omp_large_cap_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
} else if (allocator == omp_const_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
} else if (allocator == omp_low_lat_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
} else if (allocator == omp_cgroup_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
} else if (allocator == omp_pteam_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
} else if (allocator == omp_thread_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
} else { // default allocator requested
use_default_allocator = true;
}
if (use_default_allocator) {
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
use_default_allocator = false;
}
} else if (al->pool_size > 0) {
// custom allocator with pool size requested
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
if (used + desc.size_a > al->pool_size) {
// not enough space, need to go fallback path
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
@@ -1675,66 +1906,25 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
}
}
}
} else if (allocator < kmp_max_mem_alloc) {
// pre-defined allocator
if (allocator == omp_high_bw_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
} else if (allocator == omp_large_cap_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
} else if (allocator == omp_const_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
} else if (allocator == omp_low_lat_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
} else if (allocator == omp_cgroup_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
} else if (allocator == omp_pteam_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
} else if (allocator == omp_thread_mem_alloc) {
KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
} else { // default allocator requested
use_default_allocator = true;
}
if (use_default_allocator) {
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
use_default_allocator = false;
}
} else if (al->pool_size > 0) {
// custom allocator with pool size requested
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
if (used + desc.size_a > al->pool_size) {
// not enough space, need to go fallback path
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
} // else ptr == NULL
} else {
// pool has enough space
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
} // else ptr == NULL;
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} // no sense to look for another fallback because of same internal
// alloc
}
} else {
// pool has enough space
// custom allocator, pool size not requested
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} // no sense to look for another fallback because of same internal alloc
}
} else {
// custom allocator, pool size not requested
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} // no sense to look for another fallback because of same internal alloc
#if KMP_USE_HWLOC
}
#endif
KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
if (ptr == NULL)
return NULL;
@@ -1864,34 +2054,48 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
kmp_target_unlock_mem(desc.ptr_alloc, device);
}
if (__kmp_memkind_available) {
if (oal < kmp_max_mem_alloc) {
// pre-defined allocator
if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
} else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
} else {
kmp_mk_free(*mk_default, desc.ptr_alloc);
}
} else {
if (al->pool_size > 0) { // custom allocator with pool size requested
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
(void)used; // to suppress compiler warning
KMP_DEBUG_ASSERT(used >= desc.size_a);
}
kmp_mk_free(*al->memkind, desc.ptr_alloc);
}
} else {
#if KMP_USE_HWLOC
if (__kmp_hwloc_available) {
if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
(void)used; // to suppress compiler warning
KMP_DEBUG_ASSERT(used >= desc.size_a);
}
__kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
} else {
#endif
if (__kmp_memkind_available) {
if (oal < kmp_max_mem_alloc) {
// pre-defined allocator
if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
} else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
} else {
kmp_mk_free(*mk_default, desc.ptr_alloc);
}
} else {
if (al->pool_size > 0) { // custom allocator with pool size requested
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
(void)used; // to suppress compiler warning
KMP_DEBUG_ASSERT(used >= desc.size_a);
}
kmp_mk_free(*al->memkind, desc.ptr_alloc);
}
} else {
if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
(void)used; // to suppress compiler warning
KMP_DEBUG_ASSERT(used >= desc.size_a);
}
__kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
}
#if KMP_USE_HWLOC
}
#endif
}
/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes

View File

@@ -296,6 +296,7 @@ kmp_int32 __kmp_max_task_priority = 0;
kmp_uint64 __kmp_taskloop_min_tasks = 0;
int __kmp_memkind_available = 0;
bool __kmp_hwloc_available = false;
omp_allocator_handle_t const omp_null_allocator = NULL;
omp_allocator_handle_t const omp_default_mem_alloc =
(omp_allocator_handle_t const)1;

View File

@@ -3767,7 +3767,7 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value,
if (__kmp_match_str("omp_high_bw_mem_alloc", scan, &next)) {
SKIP_WS(next);
if (is_memalloc) {
if (__kmp_memkind_available) {
if (__kmp_hwloc_available || __kmp_memkind_available) {
__kmp_def_allocator = omp_high_bw_mem_alloc;
return;
} else {
@@ -3780,7 +3780,7 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value,
} else if (__kmp_match_str("omp_large_cap_mem_alloc", scan, &next)) {
SKIP_WS(next);
if (is_memalloc) {
if (__kmp_memkind_available) {
if (__kmp_hwloc_available || __kmp_memkind_available) {
__kmp_def_allocator = omp_large_cap_mem_alloc;
return;
} else {

View File

@@ -0,0 +1,25 @@
// RUN: %libomp-compile && env KMP_TOPOLOGY_METHOD=hwloc %libomp-run
// REQUIRES: hwloc
#include <stdio.h>
#include <omp.h>
int main() {
void *p[2];
#pragma omp parallel num_threads(2)
{
int i = omp_get_thread_num();
p[i] = omp_alloc(1024 * 1024, omp_get_default_allocator());
#pragma omp barrier
printf("th %d, ptr %p\n", i, p[i]);
omp_free(p[i], omp_get_default_allocator());
}
// Both pointers should be non-NULL
if (p[0] != NULL && p[1] != NULL) {
printf("passed\n");
return 0;
} else {
printf("failed: pointers %p %p\n", p[0], p[1]);
return 1;
}
}