From 3f1de197b1c339b311329c02bb739860b32c073f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 20 Jun 2025 10:31:54 -0500 Subject: [PATCH] [Offload] Rework compiling device code for unit test suites (#144776) Summary: I'll probably want to use this as a more generic utility in the future. This patch reworks it to make it a top level function. I also tried to decouple this from the OpenMP utilities to make that easier in the future. Instead, I just use `-march=native` functionality which is the same thing. Needed a small hack to skip the linker stage for checking if that works. This should still create the same output as far as I'm aware. --- offload/unittests/CMakeLists.txt | 73 ++++++++++++++++++- .../OffloadAPI/device_code/CMakeLists.txt | 69 +----------------- 2 files changed, 74 insertions(+), 68 deletions(-) diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt index 985dd892d804..7cd41e1dcdaf 100644 --- a/offload/unittests/CMakeLists.txt +++ b/offload/unittests/CMakeLists.txt @@ -1,6 +1,72 @@ add_custom_target(OffloadUnitTests) set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests") +function(add_offload_test_device_code test_filename test_name) + set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + + # Try to build with support for NVPTX devices. + if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE) + endif() + check_cxx_compiler_flag( + "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX) + + if(OFFLOAD_TESTS_FORCE_NVPTX_ARCH) + set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVPTX_ARCH}") + elseif(PLATFORM_HAS_NVPTX) + set(nvptx_arch "native") + endif() + + if(nvptx_arch AND CUDAToolkit_FOUND) + set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin") + add_custom_command( + OUTPUT ${output_file} + COMMAND ${CMAKE_C_COMPILER} + --target=nvptx64-nvidia-cuda -march=${nvptx_arch} + -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN} + -c ${SRC_PATH} -o ${output_file} + DEPENDS ${SRC_PATH} + ) + add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file}) + endif() + endif() + + # Try to build with support for AMDGPU devices. + if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) + check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU) + + if(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH) + set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}") + elseif(PLATFORM_HAS_AMDGPU) + set(amdgpu_arch "native") + endif() + + if(amdgpu_arch) + set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin") + add_custom_command( + OUTPUT ${output_file} + COMMAND ${CMAKE_C_COMPILER} + --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch} + -nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file} + DEPENDS ${SRC_PATH} + ) + add_custom_target(${test_name}.amdgpu DEPENDS ${output_file}) + endif() + endif() + + # Create a single dependency target for the device code. + add_custom_target(${test_name}.bin) + if(TARGET ${test_name}.amdgpu) + add_dependencies(${test_name}.bin ${test_name}.amdgpu) + endif() + if(TARGET ${test_name}.nvptx64) + add_dependencies(${test_name}.bin ${test_name}.nvptx64) + endif() +endfunction() + function(add_offload_unittest test_dirname) set(target_name "${test_dirname}.unittests") @@ -9,10 +75,15 @@ function(add_offload_unittest test_dirname) add_unittest(OffloadUnitTests "${target_name}" ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp ${files}) - add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins) + add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries) target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}") target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON}) target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE}) endfunction() +set(OFFLOAD_TESTS_FORCE_NVPTX_ARCH "" CACHE STRING + "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61") +set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING + "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030") + add_subdirectory(OffloadAPI) diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index c2e4d0cb24e6..132c7a7c51fb 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -1,72 +1,7 @@ -macro(add_offload_test_device_code test_filename test_name) - set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename}) - - # Build for NVPTX - if(OFFLOAD_TEST_TARGET_NVIDIA) - set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin) - add_custom_command(OUTPUT ${BIN_PATH} - COMMAND - ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda - ${ARGN} - -march=${LIBOMPTARGET_DEP_CUDA_ARCH} - --cuda-path=${CUDA_ROOT} - ${SRC_PATH} -o ${BIN_PATH} - DEPENDS ${SRC_PATH} - ) - list(APPEND BIN_PATHS ${BIN_PATH}) - endif() - - # Build for AMDGPU - if(OFFLOAD_TEST_TARGET_AMDGPU) - set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin) - add_custom_command(OUTPUT ${BIN_PATH} - COMMAND - ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib - ${ARGN} - -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH} - ${SRC_PATH} -o ${BIN_PATH} - DEPENDS ${SRC_PATH} - ) - list(APPEND BIN_PATHS ${BIN_PATH}) - endif() - - # TODO: Build for host CPU -endmacro() - - -# Decide what device targets to build for. LibomptargetGetDependencies is -# included at the top-level so the GPUs present on the system are already -# detected. -set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING - "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61") -set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING - "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030") - -find_package(CUDAToolkit QUIET) -if(CUDAToolkit_FOUND) - get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE) -endif() -if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH) - set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}) - set(OFFLOAD_TEST_TARGET_NVIDIA ON) -elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) - set(OFFLOAD_TEST_TARGET_NVIDIA ON) -endif() - -if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH) - set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}) - set(OFFLOAD_TEST_TARGET_AMDGPU ON) -elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) - list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH) - set(OFFLOAD_TEST_TARGET_AMDGPU ON) -endif() - add_offload_test_device_code(foo.c foo) add_offload_test_device_code(bar.c bar) -# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion -# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload +# Compile with optimizations to eliminate AMDGPU implicit arguments. add_offload_test_device_code(noargs.c noargs -O3) -add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS}) - +add_custom_target(offload_device_binaries DEPENDS foo.bin bar.bin noargs.bin) set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)