From 570a4b05aef2f95471b46ed56267d7c674ffcef2 Mon Sep 17 00:00:00 2001 From: Jeremy Martinon Date: Fri, 16 May 2025 16:32:05 -0400 Subject: [PATCH] CMake: support Apple ARM + GCC 14; fix typo ARM micro kernel --- CMakeLists.txt | 44 +++++++++++++++++++++++++++++++++++++------- src/transpose.cpp | 2 +- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 582ada3..f489c48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ -cmake_minimum_required(VERSION 3.7 FATAL_ERROR) +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) project (HPTT C CXX) set(CMAKE_CXX_STANDARD 11) @@ -16,6 +16,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if(ENABLE_IBM) set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp) + elseif(APPLE) + set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp -mtune=native) else() set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -fopenmp -march=native -mtune=native) endif() @@ -35,18 +37,46 @@ elseif(ENABLE_IBM) set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -mtune=native -DHPTT_ARCH_IBM -maltivec -mabi=altivec) endif() +if(APPLE) + if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + message(STATUS "Running on Apple Silicon (M-series)") + set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -O3 -ffast-math -funroll-loops -ftree-vectorize + -mcpu=native -DHPTT_ARCH_ARM) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + message(STATUS "Running on Intel-based macOS") + set(HPTT_CXX_FLAGS ${HPTT_CXX_FLAGS} -O3 -ffast-math -funroll-loops -ftree-vectorize -mavx -DHPTT_ARCH_AVX) + endif() +endif() + set(HPTT_SRCS src/hptt.cpp src/plan.cpp src/transpose.cpp src/utils.cpp) -add_library(hptt STATIC ${HPTT_SRCS}) -target_compile_features(hptt PUBLIC cxx_std_11) -target_include_directories(hptt PUBLIC ${PROJECT_SOURCE_DIR}/include) -#target_compile_definitions(hptt PRIVATE ${HPTT_CXX_COMPILE_DEFS}) -target_compile_options(hptt PUBLIC ${HPTT_CXX_FLAGS}) +add_library(hptt_static STATIC ${HPTT_SRCS}) +target_compile_features(hptt_static PUBLIC cxx_std_11) +set_target_properties(hptt_static PROPERTIES OUTPUT_NAME hptt) +target_include_directories(hptt_static PUBLIC ${PROJECT_SOURCE_DIR}/include) +target_compile_options(hptt_static PUBLIC ${HPTT_CXX_FLAGS}) + +add_library(hptt_dyn SHARED ${HPTT_SRCS}) +target_compile_features(hptt_dyn PUBLIC cxx_std_11) +set_target_properties(hptt_dyn PROPERTIES OUTPUT_NAME hptt) +target_include_directories(hptt_dyn PUBLIC ${PROJECT_SOURCE_DIR}/include) +target_compile_options(hptt_dyn PUBLIC ${HPTT_CXX_FLAGS}) + + +find_package(OpenMP REQUIRED) +target_link_libraries(hptt_static PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(hptt_dyn PUBLIC OpenMP::OpenMP_CXX) -install(TARGETS hptt + +install(TARGETS hptt_static LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS hptt_dyn + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) + + set(HPTT_INCLUDES include/compute_node.h include/hptt_types.h diff --git a/src/transpose.cpp b/src/transpose.cpp index f77cd5b..a5de997 100644 --- a/src/transpose.cpp +++ b/src/transpose.cpp @@ -244,7 +244,7 @@ static INLINE void prefetch(const floatType* A, const int lda) { } #include template -struct micro_kernel +struct micro_kernel { static void execute(const float* __restrict__ A, const size_t lda, float* __restrict__ B, const size_t ldb, const float alpha ,const float beta) {