Init cmake commit

Signed-off-by: cyy <cyyever@outlook.com>
This commit is contained in:
cyy 2022-04-05 13:15:48 +08:00
parent 593de54e52
commit b1c424d1a6
5 changed files with 154 additions and 5 deletions

24
CMakeLists.txt Normal file
View File

@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 4.0)
project(nccl LANGUAGES CUDA CXX VERSION 2.27.7)
option(VERBOSE "VERBOSE" OFF)
option(KEEP "KEEP" OFF)
option(TRACE "TRACE" OFF)
option(PROFAPI "PROFAPI" OFF)
option(NVTX "NVTX" ON)
option(NET_PROFILER "NET_PROFILER" OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
add_subdirectory(src)
install(
TARGETS nccl nccl_static
EXPORT NCCLConfig
FILE_SET public_headers
DESTINATION include)
install(
EXPORT NCCLConfig
DESTINATION lib/cmake/nccl
NAMESPACE NCCL::)

39
cmake/common.cmake Normal file
View File

@ -0,0 +1,39 @@
function(nccl_add_target_options target)
target_compile_options(${target} PRIVATE $<$<CONFIG:Debug>:-ggdb3>)
target_compile_options(${target} PRIVATE $<$<NOT:$<CONFIG:Debug>>:-O3>)
target_compile_options(
${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda -Xptxas
-maxrregcount=96 -Xfatbin -compress-all -fPIC>)
target_compile_options(${target} PRIVATE -fPIC -Wall -Wno-unused-function
-Wno-sign-compare -Wvla)
set_property(TARGET ${target} PROPERTY CXX_STANDARD 17)
set_property(TARGET ${target} PROPERTY CUDA_STANDARD 17)
set_property(TARGET ${target} PROPERTY CXX_VISIBILITY_PRESET hidden)
set_property(TARGET ${target} PROPERTY VISIBILITY_INLINES_HIDDEN 1)
set_property(TARGET ${target} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
if(VERBOSE)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xptxas
-v -Xcompiler -Wall,-Wextra>)
target_compile_options(${target} PRIVATE -Wall -Wextra)
endif()
if(TRACE)
target_compile_options(${target} PRIVATE ENABLE_TRACE)
endif()
if(NOT NVTX)
target_compile_options(${target} PRIVATE NVTX_DISABLE)
endif()
if(KEEP)
target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-keep>)
endif()
if(PROFAPI)
target_compile_options(${target} PRIVATE PROFAPI)
endif()
if(NET_PROFILER)
target_compile_options(${target} PRIVATE NET_PROFILER)
endif()
endfunction()

51
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,51 @@
include(../cmake/common.cmake)
find_package(CUDAToolkit REQUIRED)
set(nccl_Major ${nccl_VERSION_MAJOR})
set(nccl_Minor ${nccl_VERSION_MINOR})
set(nccl_Patch ${nccl_VERSION_PATCH})
# NCCL_VERSION(X,Y,Z) ((X) * 10000 + (Y) * 100 + (Z))
math(
EXPR
nccl_Version
"${nccl_VERSION_MAJOR} * 10000 + ${nccl_VERSION_MINOR} * 100 + ${nccl_VERSION_PATCH}"
)
set(nccl_Suffix)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/nccl.h.in
${CMAKE_CURRENT_SOURCE_DIR}/include/nccl.h)
file(
GLOB
SRC_FILES
"${CMAKE_CURRENT_SOURCE_DIR}/*.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/misc/*.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/transport/*.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/collectives/*.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/graph/*.cc")
set(HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/include/nccl.h")
set(NCCL_LIBS nccl;nccl_static)
add_library(nccl SHARED ${SRC_FILES})
add_library(nccl_static STATIC ${SRC_FILES})
foreach(lib_name IN LISTS NCCL_LIBS)
nccl_add_target_options(${lib_name})
target_include_directories(
${lib_name}
PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/plugin>)
target_include_directories(${lib_name} PRIVATE "${CUDAToolkit_INCLUDE_DIRS}")
target_sources(
${lib_name}
PUBLIC FILE_SET
public_headers
TYPE
HEADERS
BASE_DIRS
"${CMAKE_CURRENT_SOURCE_DIR}"
FILES
${HEADER_FILES})
endforeach()

35
src/device/CMakeLists.txt Normal file
View File

@ -0,0 +1,35 @@
set(CU_FILES onerank_reduce.cu functions.cu)
add_library(colldevice OBJECT ${CU_FILES})
set(datatypes "i8;u8;i32;u32;i64;u64;f16;f32;f64")
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11")
list(APPEND datatypes bf16)
endif()
set(ops "sum;prod;min;max;premulsum;sumpostdiv")
list(LENGTH ops op_num)
math(EXPR op_num "${op_num} - 1")
list(LENGTH datatypes datatype_num)
math(EXPR datatype_num "${datatype_num} - 1")
set(base_files "sendrecv;all_reduce;all_gather;broadcast;reduce;reduce_scatter")
foreach(base IN LISTS base_files)
foreach(opn RANGE ${op_num})
list(GET ops ${opn} op)
foreach(dtn RANGE ${datatype_num})
list(GET datatypes ${dtn} dt)
set(new_file ${CMAKE_CURRENT_BINARY_DIR}/${base}_${op}_${dt}.cu)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${base}.cu ${new_file}
COPYONLY)
set_property(SOURCE ${new_file} PROPERTY COMPILE_DEFINITIONS
NCCL_OP=${opn} NCCL_TYPE=${dtn})
target_sources(colldevice PRIVATE ${new_file})
endforeach()
endforeach()
endforeach()
target_include_directories(
colldevice PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include
${CMAKE_CURRENT_SOURCE_DIR})
# Compiled kernels and collectives with relocatable device code ...
set_property(TARGET colldevice PROPERTY CUDA_SEPARABLE_COMPILATION ON)

View File

@ -16,12 +16,12 @@
#include <cuda_fp8.h>
#endif
#define NCCL_MAJOR ${nccl:Major}
#define NCCL_MINOR ${nccl:Minor}
#define NCCL_PATCH ${nccl:Patch}
#define NCCL_SUFFIX "${nccl:Suffix}"
#define NCCL_MAJOR ${nccl_Major}
#define NCCL_MINOR ${nccl_Minor}
#define NCCL_PATCH ${nccl_Patch}
#define NCCL_SUFFIX "${nccl_Suffix}"
#define NCCL_VERSION_CODE ${nccl:Version}
#define NCCL_VERSION_CODE ${nccl_Version}
#define NCCL_VERSION(X,Y,Z) (((X) <= 2 && (Y) <= 8) ? (X) * 1000 + (Y) * 100 + (Z) : (X) * 10000 + (Y) * 100 + (Z))
#ifdef __cplusplus