[Offload] Rework compiling device code for unit test suites (#144776)
Summary: I'll probably want to use this as a more generic utility in the future. This patch reworks it to make it a top level function. I also tried to decouple this from the OpenMP utilities to make that easier in the future. Instead, I just use `-march=native` functionality which is the same thing. Needed a small hack to skip the linker stage for checking if that works. This should still create the same output as far as I'm aware.
This commit is contained in:
@@ -1,6 +1,72 @@
|
||||
add_custom_target(OffloadUnitTests)
|
||||
set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
|
||||
|
||||
function(add_offload_test_device_code test_filename test_name)
|
||||
set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
|
||||
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
|
||||
|
||||
# Try to build with support for NVPTX devices.
|
||||
if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
|
||||
find_package(CUDAToolkit QUIET)
|
||||
if(CUDAToolkit_FOUND)
|
||||
get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
|
||||
endif()
|
||||
check_cxx_compiler_flag(
|
||||
"--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
|
||||
|
||||
if(OFFLOAD_TESTS_FORCE_NVPTX_ARCH)
|
||||
set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVPTX_ARCH}")
|
||||
elseif(PLATFORM_HAS_NVPTX)
|
||||
set(nvptx_arch "native")
|
||||
endif()
|
||||
|
||||
if(nvptx_arch AND CUDAToolkit_FOUND)
|
||||
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
|
||||
add_custom_command(
|
||||
OUTPUT ${output_file}
|
||||
COMMAND ${CMAKE_C_COMPILER}
|
||||
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
|
||||
-nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
|
||||
-c ${SRC_PATH} -o ${output_file}
|
||||
DEPENDS ${SRC_PATH}
|
||||
)
|
||||
add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Try to build with support for AMDGPU devices.
|
||||
if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
|
||||
check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
|
||||
|
||||
if(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
|
||||
set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
|
||||
elseif(PLATFORM_HAS_AMDGPU)
|
||||
set(amdgpu_arch "native")
|
||||
endif()
|
||||
|
||||
if(amdgpu_arch)
|
||||
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
|
||||
add_custom_command(
|
||||
OUTPUT ${output_file}
|
||||
COMMAND ${CMAKE_C_COMPILER}
|
||||
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
|
||||
-nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file}
|
||||
DEPENDS ${SRC_PATH}
|
||||
)
|
||||
add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Create a single dependency target for the device code.
|
||||
add_custom_target(${test_name}.bin)
|
||||
if(TARGET ${test_name}.amdgpu)
|
||||
add_dependencies(${test_name}.bin ${test_name}.amdgpu)
|
||||
endif()
|
||||
if(TARGET ${test_name}.nvptx64)
|
||||
add_dependencies(${test_name}.bin ${test_name}.nvptx64)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(add_offload_unittest test_dirname)
|
||||
set(target_name "${test_dirname}.unittests")
|
||||
|
||||
@@ -9,10 +75,15 @@ function(add_offload_unittest test_dirname)
|
||||
add_unittest(OffloadUnitTests "${target_name}"
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
|
||||
${files})
|
||||
add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
|
||||
add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
|
||||
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
|
||||
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
|
||||
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
|
||||
endfunction()
|
||||
|
||||
set(OFFLOAD_TESTS_FORCE_NVPTX_ARCH "" CACHE STRING
|
||||
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
|
||||
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
|
||||
"Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
|
||||
|
||||
add_subdirectory(OffloadAPI)
|
||||
|
||||
@@ -1,72 +1,7 @@
|
||||
macro(add_offload_test_device_code test_filename test_name)
|
||||
set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
|
||||
|
||||
# Build for NVPTX
|
||||
if(OFFLOAD_TEST_TARGET_NVIDIA)
|
||||
set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
|
||||
add_custom_command(OUTPUT ${BIN_PATH}
|
||||
COMMAND
|
||||
${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
|
||||
${ARGN}
|
||||
-march=${LIBOMPTARGET_DEP_CUDA_ARCH}
|
||||
--cuda-path=${CUDA_ROOT}
|
||||
${SRC_PATH} -o ${BIN_PATH}
|
||||
DEPENDS ${SRC_PATH}
|
||||
)
|
||||
list(APPEND BIN_PATHS ${BIN_PATH})
|
||||
endif()
|
||||
|
||||
# Build for AMDGPU
|
||||
if(OFFLOAD_TEST_TARGET_AMDGPU)
|
||||
set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
|
||||
add_custom_command(OUTPUT ${BIN_PATH}
|
||||
COMMAND
|
||||
${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
|
||||
${ARGN}
|
||||
-mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
|
||||
${SRC_PATH} -o ${BIN_PATH}
|
||||
DEPENDS ${SRC_PATH}
|
||||
)
|
||||
list(APPEND BIN_PATHS ${BIN_PATH})
|
||||
endif()
|
||||
|
||||
# TODO: Build for host CPU
|
||||
endmacro()
|
||||
|
||||
|
||||
# Decide what device targets to build for. LibomptargetGetDependencies is
|
||||
# included at the top-level so the GPUs present on the system are already
|
||||
# detected.
|
||||
set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
|
||||
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
|
||||
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
|
||||
"Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
|
||||
|
||||
find_package(CUDAToolkit QUIET)
|
||||
if(CUDAToolkit_FOUND)
|
||||
get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
|
||||
endif()
|
||||
if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
|
||||
set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
|
||||
set(OFFLOAD_TEST_TARGET_NVIDIA ON)
|
||||
elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
|
||||
set(OFFLOAD_TEST_TARGET_NVIDIA ON)
|
||||
endif()
|
||||
|
||||
if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
|
||||
set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
|
||||
set(OFFLOAD_TEST_TARGET_AMDGPU ON)
|
||||
elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
|
||||
list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
|
||||
set(OFFLOAD_TEST_TARGET_AMDGPU ON)
|
||||
endif()
|
||||
|
||||
add_offload_test_device_code(foo.c foo)
|
||||
add_offload_test_device_code(bar.c bar)
|
||||
# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
|
||||
# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
|
||||
# Compile with optimizations to eliminate AMDGPU implicit arguments.
|
||||
add_offload_test_device_code(noargs.c noargs -O3)
|
||||
|
||||
add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
|
||||
|
||||
add_custom_target(offload_device_binaries DEPENDS foo.bin bar.bin noargs.bin)
|
||||
set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
|
||||
|
||||
Reference in New Issue
Block a user