From 9d8a30c929529097940c0efe1899b9db0babab1a Mon Sep 17 00:00:00 2001 From: Spaarsh Date: Sun, 5 Oct 2025 22:59:22 +0530 Subject: [PATCH 1/2] ROCm Compile Time Reduction --- CMakeLists.txt | 106 +++++++-- src/CMakeLists.txt | 362 ++++++++++++++++++++---------- src/acc/libsmm_acc/CMakeLists.txt | 107 ++++++--- 3 files changed, 407 insertions(+), 168 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 601570c24ab..551cd84ff94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,6 +101,40 @@ set(USE_ACCEL CACHE STRING "Build with acceleration support (default: none)") set_property(CACHE USE_ACCEL PROPERTY STRINGS "" opencl cuda hip) +# Add new multi-architecture option +option(MULTI_ARCH "Build for multiple GPU architectures" OFF) + +# Multi-architecture support (only when enabled) +if(MULTI_ARCH AND MULTI_GPU_BUILD) + set(WITH_GPU_LIST "P100" CACHE STRING "List of GPU architectures to build for (semicolon-separated)") + + # Define supported architectures for the property + set(SUPPORTED_CUDA_ARCHITECTURES + K20X + K40 + K80 + P100 + V100 + A100 + H100) + set(SUPPORTED_HIP_ARCHITECTURES Mi50 Mi100 Mi250 Mi300) + + set_property(CACHE WITH_GPU_LIST PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} ${SUPPORTED_HIP_ARCHITECTURES}) + + # Parse the list + string(REPLACE ";" " " WITH_GPU_LIST_STR "${WITH_GPU_LIST}") + list(LENGTH WITH_GPU_LIST GPU_COUNT) + + if(GPU_COUNT GREATER 1) + set(MULTI_GPU_BUILD ON) + message(STATUS "Multi-GPU build enabled for: ${WITH_GPU_LIST_STR}") + else() + set(MULTI_GPU_BUILD OFF) + list(GET WITH_GPU_LIST 0 WITH_GPU) + message(STATUS "Single GPU build for: ${WITH_GPU}") + endif() +endif() + set(SUPPORTED_CUDA_ARCHITECTURES K20X K40 @@ -110,16 +144,22 @@ set(SUPPORTED_CUDA_ARCHITECTURES A100 H100) set(SUPPORTED_HIP_ARCHITECTURES Mi50 Mi100 Mi250 Mi300) -set(WITH_GPU - $,"","P100"> - CACHE - STRING - "Select GPU arch. and embed parameters (default: CUDA/HIP=P100, OPENCL=all)" -) -set(WITH_GPU_PARAMS "${WITH_GPU}") -set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} - ${SUPPORTED_HIP_ARCHITECTURES}) +if(NOT MULTI_ARCH) + set(WITH_GPU + $,"","P100"> + CACHE + STRING + "Select GPU arch. and embed parameters (default: CUDA/HIP=P100, OPENCL=all)" + ) + set(WITH_GPU_PARAMS "${WITH_GPU}") + set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} + ${SUPPORTED_HIP_ARCHITECTURES}) +else() + # For multi-arch builds, set WITH_GPU_PARAMS to first architecture for compatibility + list(GET WITH_GPU_LIST 0 WITH_GPU_PARAMS) + list(GET WITH_GPU_LIST 0 WITH_GPU) # Set WITH_GPU for compatibility with existing code +endif() option(WITH_CUDA_PROFILING "Enable profiling within CUDA" OFF) option(WITH_HIP_PROFILING "Enable profiling within HIP" OFF) @@ -298,13 +338,41 @@ if (USE_ACCEL MATCHES "hip") endif () enable_language(HIP) - # Make sure the GPU required is supported - list(FIND SUPPORTED_HIP_ARCHITECTURES ${WITH_GPU} GPU_SUPPORTED) - if (GPU_SUPPORTED EQUAL -1) - message( - FATAL_ERROR "GPU architecture requested (${WITH_GPU}) is not supported. " - "Please choose from: ${SUPPORTED_HIP_ARCHITECTURES}") - endif () + if(MULTI_ARCH AND MULTI_GPU_BUILD) + # Validate all GPU architectures in the list + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + list(FIND SUPPORTED_HIP_ARCHITECTURES ${GPU_ARCH} GPU_SUPPORTED) + if (GPU_SUPPORTED EQUAL -1) + message( + FATAL_ERROR "GPU architecture requested (${GPU_ARCH}) is not supported. " + "Please choose from: ${SUPPORTED_HIP_ARCHITECTURES}") + endif () + endforeach() + + set(ACC_ARCH_NUMBERS "") + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + list(APPEND ACC_ARCH_NUMBERS ${GPU_ARCH_NUMBER_${GPU_ARCH}}) + endforeach() + + message(STATUS "Multi-GPU HIP build for architectures: ${WITH_GPU_LIST}") + message(STATUS "HIP architecture numbers: ${ACC_ARCH_NUMBERS}") + message(STATUS "Kernel parameters will be generated for: ${WITH_GPU_LIST}") + + else() + # Make sure the GPU required is supported + list(FIND SUPPORTED_HIP_ARCHITECTURES ${WITH_GPU} GPU_SUPPORTED) + if (GPU_SUPPORTED EQUAL -1) + message( + FATAL_ERROR "GPU architecture requested (${WITH_GPU}) is not supported. " + "Please choose from: ${SUPPORTED_HIP_ARCHITECTURES}") + endif () + + set(ACC_ARCH_NUMBER ${GPU_ARCH_NUMBER_${WITH_GPU}}) + message(STATUS "GPU target architecture: " ${WITH_GPU}) + message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS}) + message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER}) + message(STATUS "GPU profiling enabled: " ${WITH_HIP_PROFILING}) + endif() # ROCm is typically installed in /opt/rocm; otherwise let the user set # ROCM_PATH as an environment variable or define. @@ -329,12 +397,6 @@ if (USE_ACCEL MATCHES "hip") message(FATAL_ERROR "HIP version >= 4.4.0 is required.") endif () - set(ACC_ARCH_NUMBER ${GPU_ARCH_NUMBER_${WITH_GPU}}) - message(STATUS "GPU target architecture: " ${WITH_GPU}) - message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS}) - message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER}) - message(STATUS "GPU profiling enabled: " ${WITH_HIP_PROFILING}) - # =================================== BLAS on GPU backend find_package(hipblas CONFIG REQUIRED HINTS ${ROCM_PATH}) endif () diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4602195f8d9..b1f509f2b59 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -105,11 +105,7 @@ add_fypp_sources( utils/dbcsr_toollib.F work/dbcsr_work_operations.F) -set(DBCSR_HIP_AND_CUDA_SRCS - acc/libsmm_acc/libsmm_acc_benchmark.cpp - acc/libsmm_acc/libsmm_acc_init.cpp - acc/libsmm_acc/libsmm_acc.cpp - acc/cuda_hip/calculate_norms.cpp +set(DBCSR_HIP_AND_CUDA_COMMON_SRCS acc/cuda_hip/acc_blas.cpp acc/cuda_hip/acc_dev.cpp acc/cuda_hip/acc_error.cpp @@ -119,10 +115,17 @@ set(DBCSR_HIP_AND_CUDA_SRCS acc/cuda_hip/acc_mem.cpp acc/cuda_hip/acc_stream.cpp) -set(DBCSR_CUDA_SRCS ${DBCSR_HIP_AND_CUDA_SRCS} acc/cuda/acc_cuda.cpp +set(DBCSR_ARCH_DEPENDENT_SRCS + acc/cuda_hip/calculate_norms.cpp + acc/libsmm_acc/libsmm_acc_benchmark.cpp + acc/libsmm_acc/libsmm_acc_init.cpp + acc/libsmm_acc/libsmm_acc.cpp) + + +set(DBCSR_CUDA_SRCS ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} ${DBCSR_ARCH_DEPENDENT_SRCS} acc/cuda/acc_cuda.cpp acc/cuda/dbcsr_cuda_nvtx_cu.cpp) -set(DBCSR_HIP_SRCS ${DBCSR_HIP_AND_CUDA_SRCS} acc/hip/acc_hip.cpp) +set(DBCSR_HIP_SRCS ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} ${DBCSR_ARCH_DEPENDENT_SRCS} acc/hip/acc_hip.cpp) if (USE_ACCEL MATCHES "hip") set_source_files_properties(acc/cuda_hip/calculate_norms.cpp @@ -168,124 +171,230 @@ endif () # ================================================================================================= # DBCSR LIBRARY -add_library(dbcsr ${DBCSR_SRCS}) - -# -fPIC can also be used in the static case. Addresses are resolved during the -# linking process -set_target_properties( - dbcsr - PROPERTIES VERSION ${dbcsr_VERSION} - SOVERSION ${dbcsr_APIVERSION} - POSITION_INDEPENDENT_CODE ON) +if(MULTI_ARCH AND MULTI_GPU_BUILD) + # Multi-architecture build: create separate libraries per GPU architecture + + # First, create a common library with all architecture-independent code + set(DBCSR_COMMON_SRCS ${DBCSR_FORTRAN_SRCS}) + if (USE_ACCEL MATCHES "cuda") + # Add only common CUDA sources (manually specify to exclude arch-specific ones) + list(APPEND DBCSR_COMMON_SRCS + ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} + acc/cuda/acc_cuda.cpp + acc/cuda/dbcsr_cuda_nvtx_cu.cpp) + # Note: calculate_norms.cpp and libsmm_acc files are intentionally excluded + elseif (USE_ACCEL MATCHES "hip") + # Add only common HIP sources (manually specify to exclude arch-specific ones) + list(APPEND DBCSR_COMMON_SRCS + ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} + acc/hip/acc_hip.cpp) + # Note: calculate_norms.cpp and libsmm_acc files are intentionally excluded + elseif (USE_ACCEL MATCHES "opencl") + list(APPEND DBCSR_COMMON_SRCS ${DBCSR_OPENCL_SRCS}) + endif() + + # Create common object library (compiled once, reused for all architectures) + add_library(dbcsr_common OBJECT ${DBCSR_COMMON_SRCS}) + + # Set common properties + set_target_properties( + dbcsr_common + PROPERTIES POSITION_INDEPENDENT_CODE ON) + + # Now create separate libraries for each GPU architecture + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + # Get architecture number for this GPU + set(ARCH_NUM ${GPU_ARCH_NUMBER_${GPU_ARCH}}) + + # Create architecture-specific object library for calculate_norms.cpp + add_library(dbcsr_arch_${GPU_ARCH} OBJECT) + + if (USE_ACCEL MATCHES "hip|cuda") + target_sources(dbcsr_arch_${GPU_ARCH} PRIVATE + ${DBCSR_ARCH_DEPENDENT_SRCS}) + + target_include_directories(dbcsr_arch_${GPU_ARCH} PRIVATE + ${CMAKE_CURRENT_BINARY_DIR}/acc/libsmm_acc + ${CMAKE_CURRENT_SOURCE_DIR}/acc/libsmm_acc) + + if (USE_ACCEL MATCHES "hip") + set_source_files_properties(acc/cuda_hip/calculate_norms.cpp + PROPERTIES LANGUAGE HIP) + set_source_files_properties(acc/cuda_hip/calculate_norms.cpp + PROPERTIES COMPILE_FLAGS "-fPIE") + set_target_properties(dbcsr_arch_${GPU_ARCH} PROPERTIES HIP_ARCHITECTURES "${ARCH_NUM}") + elseif (USE_ACCEL MATCHES "cuda") + set_source_files_properties(acc/cuda_hip/calculate_norms.cpp + PROPERTIES LANGUAGE CUDA) + set_source_files_properties(acc/cuda_hip/calculate_norms.cpp + PROPERTIES COMPILE_FLAGS "--x cu") + set_target_properties(dbcsr_arch_${GPU_ARCH} PROPERTIES CUDA_ARCHITECTURES "${ARCH_NUM}") + endif() + + # Set architecture-specific compile definitions + target_compile_definitions(dbcsr_arch_${GPU_ARCH} PRIVATE + __DBCSR_ACC + $<$:__CUDA> + $<$:__HIP> + ARCH_NUMBER=${ARCH_NUM} + $<$:__CUDA_PROFILING> + $<$:__HIP_PROFILING>) + endif() + + # Create the final library combining common + arch-specific code + add_library(dbcsr_${GPU_ARCH} + $ + $) + + # Set library properties + set_target_properties( + dbcsr_${GPU_ARCH} + PROPERTIES VERSION ${dbcsr_VERSION} + SOVERSION ${dbcsr_APIVERSION} + POSITION_INDEPENDENT_CODE ON + OUTPUT_NAME "dbcsr_${GPU_ARCH}" + EXPORT_NAME "dbcsr_${GPU_ARCH}") + + set_target_properties(dbcsr_${GPU_ARCH} PROPERTIES LINKER_LANGUAGE Fortran) + + message(STATUS "Created library target: dbcsr_${GPU_ARCH} for architecture ${GPU_ARCH} (${ARCH_NUM})") + endforeach() + + # Create a convenience target that builds all GPU variants + add_custom_target(dbcsr_all_gpus) + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + add_dependencies(dbcsr_all_gpus dbcsr_${GPU_ARCH}) + endforeach() + + # Set the first GPU architecture as the default "dbcsr" target for compatibility + list(GET WITH_GPU_LIST 0 FIRST_GPU_ARCH) + add_library(dbcsr ALIAS dbcsr_${FIRST_GPU_ARCH}) + message(STATUS "Default 'dbcsr' target aliased to: dbcsr_${FIRST_GPU_ARCH}") + +else() + # Single architecture build (existing logic) + add_library(dbcsr ${DBCSR_SRCS}) + + # Set properties for single arch build + set_target_properties( + dbcsr + PROPERTIES VERSION ${dbcsr_VERSION} + SOVERSION ${dbcsr_APIVERSION} + POSITION_INDEPENDENT_CODE ON) -if (USE_ACCEL MATCHES "hip") - set_target_properties(dbcsr PROPERTIES HIP_ARCHITECTURES "${ACC_ARCH_NUMBER}") -elseif (USE_ACCEL MATCHES "cuda") - set_target_properties(dbcsr PROPERTIES CUDA_ARCHITECTURES - "${ACC_ARCH_NUMBER}") -endif () + if (USE_ACCEL MATCHES "hip") + set_target_properties(dbcsr PROPERTIES HIP_ARCHITECTURES "${ACC_ARCH_NUMBER}") + elseif (USE_ACCEL MATCHES "cuda") + set_target_properties(dbcsr PROPERTIES CUDA_ARCHITECTURES "${ACC_ARCH_NUMBER}") + endif() + + set_target_properties(dbcsr PROPERTIES LINKER_LANGUAGE Fortran) +endif() -if (USE_SMM MATCHES "libxsmm" OR (USE_SMM MATCHES "auto" AND LIBXSMM_FOUND)) - target_compile_definitions(dbcsr PRIVATE __LIBXSMM) - target_link_directories(dbcsr PUBLIC ${LIBXSMM_LIBRARY_DIRS}) - if (USE_OPENMP) - target_link_libraries(dbcsr PRIVATE PkgConfig::LIBXSMMEXT) +# ================================================================================================= +# APPLY COMMON SETTINGS TO ALL TARGETS + +# Function to apply common settings to a target +function(apply_common_dbcsr_settings target_name) + # Apply all the existing settings... + if (USE_SMM MATCHES "libxsmm" OR (USE_SMM MATCHES "auto" AND LIBXSMM_FOUND)) + target_compile_definitions(${target_name} PRIVATE __LIBXSMM) + target_link_directories(${target_name} PUBLIC ${LIBXSMM_LIBRARY_DIRS}) + if (USE_OPENMP) + target_link_libraries(${target_name} PRIVATE PkgConfig::LIBXSMMEXT) + endif () + target_link_libraries(${target_name} PRIVATE PkgConfig::LIBXSMM) + target_link_libraries(${target_name} PRIVATE ${BLAS_LIBRARIES}) endif () - target_link_libraries(dbcsr PRIVATE PkgConfig::LIBXSMM) - target_link_libraries(dbcsr PRIVATE ${BLAS_LIBRARIES}) -endif () - -if (BLAS_LIBRARIES MATCHES "mkl_") - target_compile_definitions(dbcsr PRIVATE __MKL) -endif () - -if (APPLE) - # fix /proc/self/statm can not be opened on macOS - target_compile_definitions(dbcsr PRIVATE __NO_STATM_ACCESS) - if (BLAS_LIBRARIES MATCHES "Accelerate") - target_compile_definitions(dbcsr PRIVATE __ACCELERATE) + if (BLAS_LIBRARIES MATCHES "mkl_") + target_compile_definitions(${target_name} PRIVATE __MKL) endif () -endif () -# set -DNDEBUG for Release builds -target_compile_definitions(dbcsr PRIVATE $<$:NDEBUG>) - -target_link_libraries(dbcsr PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) -target_include_directories( - dbcsr PRIVATE base) # do not export those includes, but some srcs do an - # unprefixed include -# make sure dependencies of dbcsr find the dbcsr_api.mod file plus some files -# they usually include: -target_include_directories( - dbcsr - PUBLIC $ - $ - $) -target_compile_definitions(dbcsr PRIVATE __STATM_TOTAL) -set_target_properties(dbcsr PROPERTIES LINKER_LANGUAGE Fortran) - -if (MPI_FOUND) - # once built, a user of the dbcsr library can not influence anything anymore - # by setting those flags: - target_compile_definitions(dbcsr PRIVATE __parallel) - - # If requested, use the MPI_F08 module - if (USE_MPI_F08) - target_compile_definitions(dbcsr PRIVATE __USE_MPI_F08) + if (APPLE) + target_compile_definitions(${target_name} PRIVATE __NO_STATM_ACCESS) + if (BLAS_LIBRARIES MATCHES "Accelerate") + target_compile_definitions(${target_name} PRIVATE __ACCELERATE) + endif () endif () - # Instead of resetting the compiler for MPI, we are adding the compiler flags - # otherwise added by the mpifort-wrapper directly; based on hints from: - # https://cmake.org/pipermail/cmake/2012-June/050991.html Here we assume that - # the MPI implementation found uses the same compiler as the Fortran compiler - # we found prior. Otherwise we might be adding incompatible compiler flags at - # this point. when built against MPI, a dbcsr consumer has to specify the MPI - # flags as well, therefore: PUBLIC - target_link_libraries(dbcsr PUBLIC MPI::MPI_Fortran) -endif () - -target_link_libraries( - dbcsr - PRIVATE $<$:OpenMP::OpenMP_C> - $<$:OpenMP::OpenMP_CXX> - $<$:OpenMP::OpenMP_Fortran>) - -# todo, make this a bit better with opencl. -if (USE_ACCEL MATCHES "cuda|hip") - add_subdirectory(acc/libsmm_acc) -endif () - -if (USE_ACCEL MATCHES "opencl") - add_subdirectory(acc/opencl/smm) -endif () - -if (USE_ACCEL) - target_compile_definitions( - dbcsr - PRIVATE __DBCSR_ACC - $<$:__CUDA> - $<$:__OPENCL> - $<$:ARCH_NUMBER=${ACC_ARCH_NUMBER}> - $<$:__HIP> - $<$:ARCH_NUMBER=${ACC_ARCH_NUMBER}> - $<$:__CUDA_PROFILING> - $<$:__HIP_PROFILING>) + target_compile_definitions(${target_name} PRIVATE $<$:NDEBUG>) + target_link_libraries(${target_name} PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + target_include_directories(${target_name} PRIVATE base) + target_include_directories( + ${target_name} + PUBLIC $ + $ + $) + target_compile_definitions(${target_name} PRIVATE __STATM_TOTAL) + + if (MPI_FOUND) + target_compile_definitions(${target_name} PRIVATE __parallel) + if (USE_MPI_F08) + target_compile_definitions(${target_name} PRIVATE __USE_MPI_F08) + endif () + target_link_libraries(${target_name} PUBLIC MPI::MPI_Fortran) + endif () target_link_libraries( - dbcsr - PRIVATE $<$:CUDA::cudart> - $<$:CUDA::cuda_driver> - $<$:CUDA::cublas> - $<$:CUDA::nvrtc> - $<$:CUDA::nvToolsExt> - $<$:roc::hipblas> - $<$:hiprtc> - $<$:hip::host> - $<$:roctx64> - $<$:roctracer64> - $<$:OpenCL::OpenCL>) -endif () + ${target_name} + PRIVATE $<$:OpenMP::OpenMP_C> + $<$:OpenMP::OpenMP_CXX> + $<$:OpenMP::OpenMP_Fortran>) + + if (USE_ACCEL) + # For multi-arch, we already set ARCH_NUMBER per target, for single-arch use existing logic + if(NOT MULTI_ARCH) + target_compile_definitions( + ${target_name} + PRIVATE __DBCSR_ACC + $<$:__CUDA> + $<$:__OPENCL> + $<$:ARCH_NUMBER=${ACC_ARCH_NUMBER}> + $<$:__HIP> + $<$:ARCH_NUMBER=${ACC_ARCH_NUMBER}> + $<$:__CUDA_PROFILING> + $<$:__HIP_PROFILING>) + else() + # For multi-arch common target, set basic acceleration flags + target_compile_definitions( + ${target_name} + PRIVATE __DBCSR_ACC + $<$:__CUDA> + $<$:__OPENCL> + $<$:__HIP> + $<$:__CUDA_PROFILING> + $<$:__HIP_PROFILING>) + endif() + + target_link_libraries( + ${target_name} + PRIVATE $<$:CUDA::cudart> + $<$:CUDA::cuda_driver> + $<$:CUDA::cublas> + $<$:CUDA::nvrtc> + $<$:CUDA::nvToolsExt> + $<$:roc::hipblas> + $<$:hiprtc> + $<$:hip::host> + $<$:roctx64> + $<$:roctracer64> + $<$:OpenCL::OpenCL>) + endif () +endfunction() + +# Apply settings to all targets +if(MULTI_ARCH AND MULTI_GPU_BUILD) + # Apply to common target + apply_common_dbcsr_settings(dbcsr_common) + # Apply to all GPU-specific targets + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + apply_common_dbcsr_settings(dbcsr_${GPU_ARCH}) + endforeach() +else() + # Apply to single target + apply_common_dbcsr_settings(dbcsr) +endif() # ================================================================================================= # DBCSR's C API @@ -322,11 +431,24 @@ set(config_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") set(config_namespace "DBCSR::") # Install targets -install( - TARGETS dbcsr - EXPORT DBCSRTargets - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +if(MULTI_ARCH AND MULTI_GPU_BUILD) + # Install all GPU-specific libraries + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + install( + TARGETS dbcsr_${GPU_ARCH} + EXPORT DBCSRTargets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") + endforeach() + message(STATUS "Multi-arch install: Installing libraries for all GPU architectures: ${WITH_GPU_LIST}") +else() + # Single architecture install (existing logic) + install( + TARGETS dbcsr + EXPORT DBCSRTargets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() # See https://gitlab.kitware.com/cmake/cmake/-/issues/19608 # CMAKE_INSTALL_Fortran_MODULES may not be an "official" variable if (NOT CMAKE_INSTALL_Fortran_MODULES) diff --git a/src/acc/libsmm_acc/CMakeLists.txt b/src/acc/libsmm_acc/CMakeLists.txt index e18143d33f5..126026454e4 100644 --- a/src/acc/libsmm_acc/CMakeLists.txt +++ b/src/acc/libsmm_acc/CMakeLists.txt @@ -7,29 +7,84 @@ set(SMM_ACC_KERNELS kernels/smm_acc_dnt_tiny.h kernels/smm_acc_transpose.h) -add_custom_target( - parameters ALL - COMMAND - ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_parameters.py - --gpu_version=${WITH_GPU} --base_dir=${CMAKE_CURRENT_SOURCE_DIR}/parameters - DEPENDS generate_parameters.py parameters/parameters_${WITH_GPU_PARAMS}.json - BYPRODUCTS parameters.h - COMMENT "libsmm_acc: generating parameters for GPU ${WITH_GPU_PARAMS}") - -add_custom_target( - smm_acc_kernels ALL - COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_kernels.py - ${CMAKE_CURRENT_SOURCE_DIR}/kernels - DEPENDS generate_kernels.py ${SMM_ACC_KERNELS} - BYPRODUCTS smm_acc_kernels.h - COMMENT "libsmm_acc: generating kernels") - -add_dependencies(dbcsr smm_acc_kernels parameters) -target_include_directories(dbcsr PRIVATE ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}) - -# Note: this library is only used in some of the tests, it's just to get include -# paths to generated header files. -add_library(libsmm_acc INTERFACE) -target_include_directories(libsmm_acc INTERFACE ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}) +if(MULTI_ARCH AND MULTI_GPU_BUILD) + # Multi-architecture build: create separate targets for each GPU + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + # Set GPU_PARAMS for this architecture + set(GPU_PARAMS ${GPU_ARCH}) + + add_custom_target( + parameters_${GPU_ARCH} ALL + COMMAND + ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_parameters.py + --gpu_version=${GPU_ARCH} --base_dir=${CMAKE_CURRENT_SOURCE_DIR}/parameters + DEPENDS generate_parameters.py parameters/parameters_${GPU_PARAMS}.json + BYPRODUCTS parameters_${GPU_ARCH}.h + COMMENT "libsmm_acc: generating parameters for GPU ${GPU_ARCH}") + + add_custom_target( + smm_acc_kernels_${GPU_ARCH} ALL + COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_kernels.py + ${CMAKE_CURRENT_SOURCE_DIR}/kernels + DEPENDS generate_kernels.py ${SMM_ACC_KERNELS} + BYPRODUCTS smm_acc_kernels_${GPU_ARCH}.h + COMMENT "libsmm_acc: generating kernels for GPU ${GPU_ARCH}") + + # Create the libsmm_acc target for this architecture + add_custom_target(libsmm_acc_${GPU_ARCH}) + add_dependencies(libsmm_acc_${GPU_ARCH} smm_acc_kernels_${GPU_ARCH} parameters_${GPU_ARCH}) + + # Create interface library for this architecture + add_library(libsmm_acc_interface_${GPU_ARCH} INTERFACE) + target_include_directories(libsmm_acc_interface_${GPU_ARCH} INTERFACE + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) + + message(STATUS "Created libsmm_acc targets for GPU architecture: ${GPU_ARCH}") + endforeach() + + # Create a convenience target that builds all GPU variants + add_custom_target(libsmm_acc_all_gpus) + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + add_dependencies(libsmm_acc_all_gpus libsmm_acc_${GPU_ARCH}) + endforeach() + + # For backward compatibility, make dbcsr depend on all libsmm_acc targets + foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + if(TARGET dbcsr_${GPU_ARCH}) + add_dependencies(dbcsr_${GPU_ARCH} libsmm_acc_${GPU_ARCH}) + target_include_directories(dbcsr_${GPU_ARCH} PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + endforeach() + +else() + # Single architecture build (existing logic) + add_custom_target( + parameters ALL + COMMAND + ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_parameters.py + --gpu_version=${WITH_GPU} --base_dir=${CMAKE_CURRENT_SOURCE_DIR}/parameters + DEPENDS generate_parameters.py parameters/parameters_${WITH_GPU_PARAMS}.json + BYPRODUCTS parameters.h + COMMENT "libsmm_acc: generating parameters for GPU ${WITH_GPU_PARAMS}") + + add_custom_target( + smm_acc_kernels ALL + COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_kernels.py + ${CMAKE_CURRENT_SOURCE_DIR}/kernels + DEPENDS generate_kernels.py ${SMM_ACC_KERNELS} + BYPRODUCTS smm_acc_kernels.h + COMMENT "libsmm_acc: generating kernels") + + add_dependencies(dbcsr smm_acc_kernels parameters) + target_include_directories(dbcsr PRIVATE ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) + + # Note: this library is only used in some of the tests, it's just to get include + # paths to generated header files. + add_library(libsmm_acc INTERFACE) + target_include_directories(libsmm_acc INTERFACE ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) +endif() \ No newline at end of file From de9af17f9565bd23e34a02af40d55b73a61abe60 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 18 Oct 2025 17:50:03 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CMakeLists.txt | 61 ++++++----- src/CMakeLists.txt | 176 ++++++++++++++++-------------- src/acc/libsmm_acc/CMakeLists.txt | 61 ++++++----- 3 files changed, 161 insertions(+), 137 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 551cd84ff94..cb95f8cdcb2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,9 +105,12 @@ set_property(CACHE USE_ACCEL PROPERTY STRINGS "" opencl cuda hip) option(MULTI_ARCH "Build for multiple GPU architectures" OFF) # Multi-architecture support (only when enabled) -if(MULTI_ARCH AND MULTI_GPU_BUILD) - set(WITH_GPU_LIST "P100" CACHE STRING "List of GPU architectures to build for (semicolon-separated)") - +if (MULTI_ARCH AND MULTI_GPU_BUILD) + set(WITH_GPU_LIST + "P100" + CACHE STRING + "List of GPU architectures to build for (semicolon-separated)") + # Define supported architectures for the property set(SUPPORTED_CUDA_ARCHITECTURES K20X @@ -118,22 +121,24 @@ if(MULTI_ARCH AND MULTI_GPU_BUILD) A100 H100) set(SUPPORTED_HIP_ARCHITECTURES Mi50 Mi100 Mi250 Mi300) - - set_property(CACHE WITH_GPU_LIST PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} ${SUPPORTED_HIP_ARCHITECTURES}) + + set_property( + CACHE WITH_GPU_LIST PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} + ${SUPPORTED_HIP_ARCHITECTURES}) # Parse the list string(REPLACE ";" " " WITH_GPU_LIST_STR "${WITH_GPU_LIST}") list(LENGTH WITH_GPU_LIST GPU_COUNT) - if(GPU_COUNT GREATER 1) + if (GPU_COUNT GREATER 1) set(MULTI_GPU_BUILD ON) message(STATUS "Multi-GPU build enabled for: ${WITH_GPU_LIST_STR}") - else() + else () set(MULTI_GPU_BUILD OFF) list(GET WITH_GPU_LIST 0 WITH_GPU) message(STATUS "Single GPU build for: ${WITH_GPU}") - endif() -endif() + endif () +endif () set(SUPPORTED_CUDA_ARCHITECTURES K20X @@ -145,7 +150,7 @@ set(SUPPORTED_CUDA_ARCHITECTURES H100) set(SUPPORTED_HIP_ARCHITECTURES Mi50 Mi100 Mi250 Mi300) -if(NOT MULTI_ARCH) +if (NOT MULTI_ARCH) set(WITH_GPU $,"","P100"> CACHE @@ -154,12 +159,14 @@ if(NOT MULTI_ARCH) ) set(WITH_GPU_PARAMS "${WITH_GPU}") set_property(CACHE WITH_GPU PROPERTY STRINGS ${SUPPORTED_CUDA_ARCHITECTURES} - ${SUPPORTED_HIP_ARCHITECTURES}) -else() - # For multi-arch builds, set WITH_GPU_PARAMS to first architecture for compatibility + ${SUPPORTED_HIP_ARCHITECTURES}) +else () + # For multi-arch builds, set WITH_GPU_PARAMS to first architecture for + # compatibility list(GET WITH_GPU_LIST 0 WITH_GPU_PARAMS) - list(GET WITH_GPU_LIST 0 WITH_GPU) # Set WITH_GPU for compatibility with existing code -endif() + list(GET WITH_GPU_LIST 0 WITH_GPU) # Set WITH_GPU for compatibility with + # existing code +endif () option(WITH_CUDA_PROFILING "Enable profiling within CUDA" OFF) option(WITH_HIP_PROFILING "Enable profiling within HIP" OFF) @@ -338,33 +345,35 @@ if (USE_ACCEL MATCHES "hip") endif () enable_language(HIP) - if(MULTI_ARCH AND MULTI_GPU_BUILD) + if (MULTI_ARCH AND MULTI_GPU_BUILD) # Validate all GPU architectures in the list - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) list(FIND SUPPORTED_HIP_ARCHITECTURES ${GPU_ARCH} GPU_SUPPORTED) if (GPU_SUPPORTED EQUAL -1) message( - FATAL_ERROR "GPU architecture requested (${GPU_ARCH}) is not supported. " - "Please choose from: ${SUPPORTED_HIP_ARCHITECTURES}") + FATAL_ERROR + "GPU architecture requested (${GPU_ARCH}) is not supported. " + "Please choose from: ${SUPPORTED_HIP_ARCHITECTURES}") endif () - endforeach() + endforeach () set(ACC_ARCH_NUMBERS "") - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) list(APPEND ACC_ARCH_NUMBERS ${GPU_ARCH_NUMBER_${GPU_ARCH}}) - endforeach() + endforeach () message(STATUS "Multi-GPU HIP build for architectures: ${WITH_GPU_LIST}") message(STATUS "HIP architecture numbers: ${ACC_ARCH_NUMBERS}") message(STATUS "Kernel parameters will be generated for: ${WITH_GPU_LIST}") - else() + else () # Make sure the GPU required is supported list(FIND SUPPORTED_HIP_ARCHITECTURES ${WITH_GPU} GPU_SUPPORTED) if (GPU_SUPPORTED EQUAL -1) message( - FATAL_ERROR "GPU architecture requested (${WITH_GPU}) is not supported. " - "Please choose from: ${SUPPORTED_HIP_ARCHITECTURES}") + FATAL_ERROR + "GPU architecture requested (${WITH_GPU}) is not supported. " + "Please choose from: ${SUPPORTED_HIP_ARCHITECTURES}") endif () set(ACC_ARCH_NUMBER ${GPU_ARCH_NUMBER_${WITH_GPU}}) @@ -372,7 +381,7 @@ if (USE_ACCEL MATCHES "hip") message(STATUS "Kernel parameters: " ${WITH_GPU_PARAMS}) message(STATUS "GPU architecture number: " ${ACC_ARCH_NUMBER}) message(STATUS "GPU profiling enabled: " ${WITH_HIP_PROFILING}) - endif() + endif () # ROCm is typically installed in /opt/rocm; otherwise let the user set # ROCM_PATH as an environment variable or define. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b1f509f2b59..67e8be86583 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -116,16 +116,15 @@ set(DBCSR_HIP_AND_CUDA_COMMON_SRCS acc/cuda_hip/acc_stream.cpp) set(DBCSR_ARCH_DEPENDENT_SRCS - acc/cuda_hip/calculate_norms.cpp - acc/libsmm_acc/libsmm_acc_benchmark.cpp - acc/libsmm_acc/libsmm_acc_init.cpp - acc/libsmm_acc/libsmm_acc.cpp) - + acc/cuda_hip/calculate_norms.cpp acc/libsmm_acc/libsmm_acc_benchmark.cpp + acc/libsmm_acc/libsmm_acc_init.cpp acc/libsmm_acc/libsmm_acc.cpp) -set(DBCSR_CUDA_SRCS ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} ${DBCSR_ARCH_DEPENDENT_SRCS} acc/cuda/acc_cuda.cpp - acc/cuda/dbcsr_cuda_nvtx_cu.cpp) +set(DBCSR_CUDA_SRCS + ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} ${DBCSR_ARCH_DEPENDENT_SRCS} + acc/cuda/acc_cuda.cpp acc/cuda/dbcsr_cuda_nvtx_cu.cpp) -set(DBCSR_HIP_SRCS ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} ${DBCSR_ARCH_DEPENDENT_SRCS} acc/hip/acc_hip.cpp) +set(DBCSR_HIP_SRCS ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} + ${DBCSR_ARCH_DEPENDENT_SRCS} acc/hip/acc_hip.cpp) if (USE_ACCEL MATCHES "hip") set_source_files_properties(acc/cuda_hip/calculate_norms.cpp @@ -171,81 +170,81 @@ endif () # ================================================================================================= # DBCSR LIBRARY -if(MULTI_ARCH AND MULTI_GPU_BUILD) +if (MULTI_ARCH AND MULTI_GPU_BUILD) # Multi-architecture build: create separate libraries per GPU architecture - + # First, create a common library with all architecture-independent code set(DBCSR_COMMON_SRCS ${DBCSR_FORTRAN_SRCS}) if (USE_ACCEL MATCHES "cuda") - # Add only common CUDA sources (manually specify to exclude arch-specific ones) - list(APPEND DBCSR_COMMON_SRCS - ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} - acc/cuda/acc_cuda.cpp - acc/cuda/dbcsr_cuda_nvtx_cu.cpp) + # Add only common CUDA sources (manually specify to exclude arch-specific + # ones) + list(APPEND DBCSR_COMMON_SRCS ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} + acc/cuda/acc_cuda.cpp acc/cuda/dbcsr_cuda_nvtx_cu.cpp) # Note: calculate_norms.cpp and libsmm_acc files are intentionally excluded elseif (USE_ACCEL MATCHES "hip") - # Add only common HIP sources (manually specify to exclude arch-specific ones) - list(APPEND DBCSR_COMMON_SRCS - ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} - acc/hip/acc_hip.cpp) + # Add only common HIP sources (manually specify to exclude arch-specific + # ones) + list(APPEND DBCSR_COMMON_SRCS ${DBCSR_HIP_AND_CUDA_COMMON_SRCS} + acc/hip/acc_hip.cpp) # Note: calculate_norms.cpp and libsmm_acc files are intentionally excluded elseif (USE_ACCEL MATCHES "opencl") list(APPEND DBCSR_COMMON_SRCS ${DBCSR_OPENCL_SRCS}) - endif() + endif () # Create common object library (compiled once, reused for all architectures) add_library(dbcsr_common OBJECT ${DBCSR_COMMON_SRCS}) - + # Set common properties - set_target_properties( - dbcsr_common - PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties(dbcsr_common PROPERTIES POSITION_INDEPENDENT_CODE ON) # Now create separate libraries for each GPU architecture - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) # Get architecture number for this GPU set(ARCH_NUM ${GPU_ARCH_NUMBER_${GPU_ARCH}}) - + # Create architecture-specific object library for calculate_norms.cpp add_library(dbcsr_arch_${GPU_ARCH} OBJECT) - + if (USE_ACCEL MATCHES "hip|cuda") - target_sources(dbcsr_arch_${GPU_ARCH} PRIVATE - ${DBCSR_ARCH_DEPENDENT_SRCS}) - - target_include_directories(dbcsr_arch_${GPU_ARCH} PRIVATE - ${CMAKE_CURRENT_BINARY_DIR}/acc/libsmm_acc - ${CMAKE_CURRENT_SOURCE_DIR}/acc/libsmm_acc) - + target_sources(dbcsr_arch_${GPU_ARCH} + PRIVATE ${DBCSR_ARCH_DEPENDENT_SRCS}) + + target_include_directories( + dbcsr_arch_${GPU_ARCH} + PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/acc/libsmm_acc + ${CMAKE_CURRENT_SOURCE_DIR}/acc/libsmm_acc) + if (USE_ACCEL MATCHES "hip") set_source_files_properties(acc/cuda_hip/calculate_norms.cpp PROPERTIES LANGUAGE HIP) set_source_files_properties(acc/cuda_hip/calculate_norms.cpp PROPERTIES COMPILE_FLAGS "-fPIE") - set_target_properties(dbcsr_arch_${GPU_ARCH} PROPERTIES HIP_ARCHITECTURES "${ARCH_NUM}") + set_target_properties(dbcsr_arch_${GPU_ARCH} + PROPERTIES HIP_ARCHITECTURES "${ARCH_NUM}") elseif (USE_ACCEL MATCHES "cuda") set_source_files_properties(acc/cuda_hip/calculate_norms.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(acc/cuda_hip/calculate_norms.cpp PROPERTIES COMPILE_FLAGS "--x cu") - set_target_properties(dbcsr_arch_${GPU_ARCH} PROPERTIES CUDA_ARCHITECTURES "${ARCH_NUM}") - endif() - + set_target_properties(dbcsr_arch_${GPU_ARCH} + PROPERTIES CUDA_ARCHITECTURES "${ARCH_NUM}") + endif () + # Set architecture-specific compile definitions - target_compile_definitions(dbcsr_arch_${GPU_ARCH} PRIVATE - __DBCSR_ACC - $<$:__CUDA> - $<$:__HIP> - ARCH_NUMBER=${ARCH_NUM} - $<$:__CUDA_PROFILING> - $<$:__HIP_PROFILING>) - endif() - + target_compile_definitions( + dbcsr_arch_${GPU_ARCH} + PRIVATE __DBCSR_ACC + $<$:__CUDA> + $<$:__HIP> + ARCH_NUMBER=${ARCH_NUM} + $<$:__CUDA_PROFILING> + $<$:__HIP_PROFILING>) + endif () + # Create the final library combining common + arch-specific code - add_library(dbcsr_${GPU_ARCH} - $ - $) - + add_library(dbcsr_${GPU_ARCH} $ + $) + # Set library properties set_target_properties( dbcsr_${GPU_ARCH} @@ -256,25 +255,29 @@ if(MULTI_ARCH AND MULTI_GPU_BUILD) EXPORT_NAME "dbcsr_${GPU_ARCH}") set_target_properties(dbcsr_${GPU_ARCH} PROPERTIES LINKER_LANGUAGE Fortran) - - message(STATUS "Created library target: dbcsr_${GPU_ARCH} for architecture ${GPU_ARCH} (${ARCH_NUM})") - endforeach() + + message( + STATUS + "Created library target: dbcsr_${GPU_ARCH} for architecture ${GPU_ARCH} (${ARCH_NUM})" + ) + endforeach () # Create a convenience target that builds all GPU variants add_custom_target(dbcsr_all_gpus) - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) add_dependencies(dbcsr_all_gpus dbcsr_${GPU_ARCH}) - endforeach() - - # Set the first GPU architecture as the default "dbcsr" target for compatibility + endforeach () + + # Set the first GPU architecture as the default "dbcsr" target for + # compatibility list(GET WITH_GPU_LIST 0 FIRST_GPU_ARCH) add_library(dbcsr ALIAS dbcsr_${FIRST_GPU_ARCH}) message(STATUS "Default 'dbcsr' target aliased to: dbcsr_${FIRST_GPU_ARCH}") -else() +else () # Single architecture build (existing logic) add_library(dbcsr ${DBCSR_SRCS}) - + # Set properties for single arch build set_target_properties( dbcsr @@ -283,19 +286,21 @@ else() POSITION_INDEPENDENT_CODE ON) if (USE_ACCEL MATCHES "hip") - set_target_properties(dbcsr PROPERTIES HIP_ARCHITECTURES "${ACC_ARCH_NUMBER}") + set_target_properties(dbcsr PROPERTIES HIP_ARCHITECTURES + "${ACC_ARCH_NUMBER}") elseif (USE_ACCEL MATCHES "cuda") - set_target_properties(dbcsr PROPERTIES CUDA_ARCHITECTURES "${ACC_ARCH_NUMBER}") - endif() - + set_target_properties(dbcsr PROPERTIES CUDA_ARCHITECTURES + "${ACC_ARCH_NUMBER}") + endif () + set_target_properties(dbcsr PROPERTIES LINKER_LANGUAGE Fortran) -endif() +endif () # ================================================================================================= # APPLY COMMON SETTINGS TO ALL TARGETS # Function to apply common settings to a target -function(apply_common_dbcsr_settings target_name) +function (apply_common_dbcsr_settings target_name) # Apply all the existing settings... if (USE_SMM MATCHES "libxsmm" OR (USE_SMM MATCHES "auto" AND LIBXSMM_FOUND)) target_compile_definitions(${target_name} PRIVATE __LIBXSMM) @@ -319,7 +324,8 @@ function(apply_common_dbcsr_settings target_name) endif () target_compile_definitions(${target_name} PRIVATE $<$:NDEBUG>) - target_link_libraries(${target_name} PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + target_link_libraries(${target_name} PRIVATE ${BLAS_LIBRARIES} + ${LAPACK_LIBRARIES}) target_include_directories(${target_name} PRIVATE base) target_include_directories( ${target_name} @@ -343,8 +349,9 @@ function(apply_common_dbcsr_settings target_name) $<$:OpenMP::OpenMP_Fortran>) if (USE_ACCEL) - # For multi-arch, we already set ARCH_NUMBER per target, for single-arch use existing logic - if(NOT MULTI_ARCH) + # For multi-arch, we already set ARCH_NUMBER per target, for single-arch use + # existing logic + if (NOT MULTI_ARCH) target_compile_definitions( ${target_name} PRIVATE __DBCSR_ACC @@ -355,7 +362,7 @@ function(apply_common_dbcsr_settings target_name) $<$:ARCH_NUMBER=${ACC_ARCH_NUMBER}> $<$:__CUDA_PROFILING> $<$:__HIP_PROFILING>) - else() + else () # For multi-arch common target, set basic acceleration flags target_compile_definitions( ${target_name} @@ -365,7 +372,7 @@ function(apply_common_dbcsr_settings target_name) $<$:__HIP> $<$:__CUDA_PROFILING> $<$:__HIP_PROFILING>) - endif() + endif () target_link_libraries( ${target_name} @@ -381,20 +388,20 @@ function(apply_common_dbcsr_settings target_name) $<$:roctracer64> $<$:OpenCL::OpenCL>) endif () -endfunction() +endfunction () # Apply settings to all targets -if(MULTI_ARCH AND MULTI_GPU_BUILD) +if (MULTI_ARCH AND MULTI_GPU_BUILD) # Apply to common target apply_common_dbcsr_settings(dbcsr_common) # Apply to all GPU-specific targets - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) apply_common_dbcsr_settings(dbcsr_${GPU_ARCH}) - endforeach() -else() + endforeach () +else () # Apply to single target apply_common_dbcsr_settings(dbcsr) -endif() +endif () # ================================================================================================= # DBCSR's C API @@ -431,24 +438,27 @@ set(config_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") set(config_namespace "DBCSR::") # Install targets -if(MULTI_ARCH AND MULTI_GPU_BUILD) +if (MULTI_ARCH AND MULTI_GPU_BUILD) # Install all GPU-specific libraries - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) install( TARGETS dbcsr_${GPU_ARCH} EXPORT DBCSRTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") - endforeach() - message(STATUS "Multi-arch install: Installing libraries for all GPU architectures: ${WITH_GPU_LIST}") -else() + endforeach () + message( + STATUS + "Multi-arch install: Installing libraries for all GPU architectures: ${WITH_GPU_LIST}" + ) +else () # Single architecture install (existing logic) install( TARGETS dbcsr EXPORT DBCSRTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") -endif() +endif () # See https://gitlab.kitware.com/cmake/cmake/-/issues/19608 # CMAKE_INSTALL_Fortran_MODULES may not be an "official" variable if (NOT CMAKE_INSTALL_Fortran_MODULES) diff --git a/src/acc/libsmm_acc/CMakeLists.txt b/src/acc/libsmm_acc/CMakeLists.txt index 126026454e4..a06757d8ec0 100644 --- a/src/acc/libsmm_acc/CMakeLists.txt +++ b/src/acc/libsmm_acc/CMakeLists.txt @@ -7,65 +7,70 @@ set(SMM_ACC_KERNELS kernels/smm_acc_dnt_tiny.h kernels/smm_acc_transpose.h) -if(MULTI_ARCH AND MULTI_GPU_BUILD) +if (MULTI_ARCH AND MULTI_GPU_BUILD) # Multi-architecture build: create separate targets for each GPU - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) # Set GPU_PARAMS for this architecture set(GPU_PARAMS ${GPU_ARCH}) - + add_custom_target( parameters_${GPU_ARCH} ALL COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_parameters.py - --gpu_version=${GPU_ARCH} --base_dir=${CMAKE_CURRENT_SOURCE_DIR}/parameters + --gpu_version=${GPU_ARCH} + --base_dir=${CMAKE_CURRENT_SOURCE_DIR}/parameters DEPENDS generate_parameters.py parameters/parameters_${GPU_PARAMS}.json BYPRODUCTS parameters_${GPU_ARCH}.h COMMENT "libsmm_acc: generating parameters for GPU ${GPU_ARCH}") add_custom_target( smm_acc_kernels_${GPU_ARCH} ALL - COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_kernels.py - ${CMAKE_CURRENT_SOURCE_DIR}/kernels + COMMAND + ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_kernels.py + ${CMAKE_CURRENT_SOURCE_DIR}/kernels DEPENDS generate_kernels.py ${SMM_ACC_KERNELS} BYPRODUCTS smm_acc_kernels_${GPU_ARCH}.h COMMENT "libsmm_acc: generating kernels for GPU ${GPU_ARCH}") # Create the libsmm_acc target for this architecture add_custom_target(libsmm_acc_${GPU_ARCH}) - add_dependencies(libsmm_acc_${GPU_ARCH} smm_acc_kernels_${GPU_ARCH} parameters_${GPU_ARCH}) + add_dependencies(libsmm_acc_${GPU_ARCH} smm_acc_kernels_${GPU_ARCH} + parameters_${GPU_ARCH}) # Create interface library for this architecture add_library(libsmm_acc_interface_${GPU_ARCH} INTERFACE) - target_include_directories(libsmm_acc_interface_${GPU_ARCH} INTERFACE - ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}) - - message(STATUS "Created libsmm_acc targets for GPU architecture: ${GPU_ARCH}") - endforeach() + target_include_directories( + libsmm_acc_interface_${GPU_ARCH} INTERFACE ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) + + message( + STATUS "Created libsmm_acc targets for GPU architecture: ${GPU_ARCH}") + endforeach () # Create a convenience target that builds all GPU variants add_custom_target(libsmm_acc_all_gpus) - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) add_dependencies(libsmm_acc_all_gpus libsmm_acc_${GPU_ARCH}) - endforeach() - + endforeach () + # For backward compatibility, make dbcsr depend on all libsmm_acc targets - foreach(GPU_ARCH IN LISTS WITH_GPU_LIST) - if(TARGET dbcsr_${GPU_ARCH}) + foreach (GPU_ARCH IN LISTS WITH_GPU_LIST) + if (TARGET dbcsr_${GPU_ARCH}) add_dependencies(dbcsr_${GPU_ARCH} libsmm_acc_${GPU_ARCH}) - target_include_directories(dbcsr_${GPU_ARCH} PRIVATE - ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - endforeach() + target_include_directories( + dbcsr_${GPU_ARCH} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) + endif () + endforeach () -else() +else () # Single architecture build (existing logic) add_custom_target( parameters ALL COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_parameters.py - --gpu_version=${WITH_GPU} --base_dir=${CMAKE_CURRENT_SOURCE_DIR}/parameters + --gpu_version=${WITH_GPU} + --base_dir=${CMAKE_CURRENT_SOURCE_DIR}/parameters DEPENDS generate_parameters.py parameters/parameters_${WITH_GPU_PARAMS}.json BYPRODUCTS parameters.h COMMENT "libsmm_acc: generating parameters for GPU ${WITH_GPU_PARAMS}") @@ -82,9 +87,9 @@ else() target_include_directories(dbcsr PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) - # Note: this library is only used in some of the tests, it's just to get include - # paths to generated header files. + # Note: this library is only used in some of the tests, it's just to get + # include paths to generated header files. add_library(libsmm_acc INTERFACE) target_include_directories(libsmm_acc INTERFACE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) -endif() \ No newline at end of file +endif ()