summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJared Van Bortel <jared@nomic.ai>2024-01-26 15:34:06 -0500
committerGitHub <noreply@github.com>2024-01-26 15:34:06 -0500
commitbbe7c56c9993af86aa2d84cbe1fd69e1b4300cea (patch)
treec86fcbd3a0aeaa7596cfdc49c7857d8ac5ac9244
parent62fead3ea0a30c8d424f4a8373fa14165c7c707f (diff)
cmake : pass CPU architecture flags to nvcc (#5146)
-rw-r--r--CMakeLists.txt74
1 files changed, 39 insertions, 35 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index af366512..2b2ae532 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -466,17 +466,17 @@ function(get_flags CCID CCVER)
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
- set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
+ list(APPEND C_FLAGS -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
- set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
+ list(APPEND CXX_FLAGS -Wno-format-truncation)
endif()
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
- set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
+ list(APPEND CXX_FLAGS -Wextra-semi)
endif()
elseif (CCID MATCHES "Intel")
# enable max optimization level when using Intel compiler
@@ -510,16 +510,18 @@ if (LLAMA_ALL_WARNINGS)
endif()
endif()
+set(CUDA_CXX_FLAGS "")
+
if (LLAMA_CUBLAS)
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
if (NOT MSVC)
- set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
+ list(APPEND CUDA_FLAGS -Wno-pedantic)
endif()
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
- set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
+ list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
endif()
execute_process(
@@ -547,13 +549,8 @@ if (LLAMA_CUBLAS)
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
get_flags(${CUDA_CCID} ${CUDA_CCVER})
- list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
- if (NOT CUDA_CXX_FLAGS STREQUAL "")
- set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
- endif()
+ list(APPEND CUDA_CXX_FLAGS ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
endif()
-
- add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
endif()
if (WIN32)
@@ -618,12 +615,7 @@ if (NOT MSVC)
endif()
endif()
-function(add_compile_option_cpp ARG)
- # Adds a compile option to C/C++ only, but not for Cuda.
- # Use, e.g., for CPU-architecture flags.
- add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${ARG}>)
- add_compile_options($<$<COMPILE_LANGUAGE:C>:${ARG}>)
-endfunction()
+set(ARCH_FLAGS "")
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
message(STATUS "ARM detected")
@@ -636,19 +628,19 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
else()
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
- add_compile_options(-mfp16-format=ieee)
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
# Raspberry Pi 1, Zero
- add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access)
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
# Raspberry Pi 2
- add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
# Raspberry Pi 3, 4, Zero 2 (32-bit)
- add_compile_options(-mno-unaligned-access)
+ list(APPEND ARCH_FLAGS -mno-unaligned-access)
endif()
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
@@ -659,7 +651,7 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
include(cmake/FindSIMD.cmake)
endif ()
if (LLAMA_AVX512)
- add_compile_option_cpp(/arch:AVX512)
+ list(APPEND ARCH_FLAGS /arch:AVX512)
# MSVC has no compile-time flags enabling specific
# AVX512 extensions, neither it defines the
# macros corresponding to the extensions.
@@ -673,49 +665,61 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
endif()
elseif (LLAMA_AVX2)
- add_compile_option_cpp(/arch:AVX2)
+ list(APPEND ARCH_FLAGS /arch:AVX2)
elseif (LLAMA_AVX)
- add_compile_option_cpp(/arch:AVX)
+ list(APPEND ARCH_FLAGS /arch:AVX)
endif()
else()
if (LLAMA_NATIVE)
- add_compile_option_cpp(-march=native)
+ list(APPEND ARCH_FLAGS -march=native)
endif()
if (LLAMA_F16C)
- add_compile_option_cpp(-mf16c)
+ list(APPEND ARCH_FLAGS -mf16c)
endif()
if (LLAMA_FMA)
- add_compile_option_cpp(-mfma)
+ list(APPEND ARCH_FLAGS -mfma)
endif()
if (LLAMA_AVX)
- add_compile_option_cpp(-mavx)
+ list(APPEND ARCH_FLAGS -mavx)
endif()
if (LLAMA_AVX2)
- add_compile_option_cpp(-mavx2)
+ list(APPEND ARCH_FLAGS -mavx2)
endif()
if (LLAMA_AVX512)
- add_compile_option_cpp(-mavx512f)
- add_compile_option_cpp(-mavx512bw)
+ list(APPEND ARCH_FLAGS -mavx512f)
+ list(APPEND ARCH_FLAGS -mavx512bw)
endif()
if (LLAMA_AVX512_VBMI)
- add_compile_option_cpp(-mavx512vbmi)
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
endif()
if (LLAMA_AVX512_VNNI)
- add_compile_option_cpp(-mavx512vnni)
+ list(APPEND ARCH_FLAGS -mavx512vnni)
endif()
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
message(STATUS "PowerPC detected")
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
- add_compile_options(-mcpu=powerpc64le)
+ list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
else()
- add_compile_options(-mcpu=native -mtune=native)
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
endif()
else()
message(STATUS "Unknown architecture")
endif()
+add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
+add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
+
+if (LLAMA_CUBLAS)
+ list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
+ list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
+ if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
+ list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
+ endif()
+ add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
+endif()
+
if (MINGW)
# Target Windows 8 for PrefetchVirtualMemory
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})