3
# Poor man's include guard
4
if(TARGET torch::cudart)
8
# sccache is only supported in CMake master and not in the newest official
9
# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache.
10
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix)
12
# We don't want to statically link cudart, because we rely on it's dynamic linkage in
13
# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName).
14
# Technically, we can link cudart here statically, and link libtorch_python.so
15
# to a dynamic libcudart.so, but that's just wasteful.
16
# However, on Windows, if this one gets switched off, the error "cuda: unknown error"
17
# will be raised when running the following code:
19
# >>> torch.cuda.is_available()
20
# >>> torch.cuda.current_device()
21
# More details can be found in the following links.
22
# https://github.com/pytorch/pytorch/issues/20635
23
# https://github.com/pytorch/pytorch/issues/17108
25
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
32
"Caffe2: CUDA cannot be found. Depending on whether you are building "
33
"Caffe2 or a Caffe2 dependent library, the next warning / error will "
34
"give you more info.")
35
set(CAFFE2_USE_CUDA OFF)
39
# Enable CUDA language support
40
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
41
# Pass clang as host compiler, which according to the docs
42
# Must be done before CUDA language is enabled, see
43
# https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
44
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
45
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}")
48
if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X" )
49
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
51
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
53
# CMP0074 - find_package will respect <PackageName>_ROOT variables
55
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0)
56
cmake_policy(SET CMP0074 NEW)
59
find_package(CUDAToolkit REQUIRED)
63
if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
64
message(FATAL_ERROR "Found two conflicting CUDA versions:\n"
65
"V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
66
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
69
message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
70
message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
71
message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
72
if(CUDA_VERSION VERSION_LESS 11.0)
73
message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.")
77
# Sometimes, we may mismatch nvcc with the CUDA headers we are
78
# compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
79
# but the PATH is not consistent with CUDA_HOME. It's better safe
80
# than sorry: make sure everything is consistent.
81
if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
82
# When using Visual Studio, it attempts to lock the whole binary dir when
83
# `try_run` is called, which will cause the build to fail.
84
string(RANDOM BUILD_SUFFIX)
85
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}/${BUILD_SUFFIX}")
87
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
89
set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc")
94
" printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n"
98
if(NOT CMAKE_CROSSCOMPILING)
99
try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
100
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
101
LINK_LIBRARIES ${CUDA_LIBRARIES}
102
RUN_OUTPUT_VARIABLE cuda_version_from_header
103
COMPILE_OUTPUT_VARIABLE output_var
105
if(NOT compile_result)
106
message(FATAL_ERROR "Caffe2: Couldn't determine version from header: " ${output_var})
108
message(STATUS "Caffe2: Header version is: " ${cuda_version_from_header})
109
if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
110
# Force CUDA to be processed for again next time
111
# TODO: I'm not sure if this counts as an implementation detail of
113
set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING})
114
unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
115
# Not strictly necessary, but for good luck.
116
unset(CUDA_VERSION CACHE)
118
message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
119
"but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs "
120
"when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
121
"non-standard locations, without also setting PATH to point to the correct nvcc. "
122
"Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. "
123
"See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
128
# ---[ CUDA libraries wrapper
131
set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
132
if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
133
find_package(Python COMPONENTS Interpreter)
135
COMMAND Python::Interpreter -c
136
"import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
137
RESULT_VARIABLE _retval
138
OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH)
139
if(NOT _retval EQUAL 0)
140
message(WARNING "Failed to compute shorthash for libnvrtc.so")
141
set(CUDA_NVRTC_SHORTHASH "XXXXXXXX")
143
string(STRIP "${CUDA_NVRTC_SHORTHASH}" CUDA_NVRTC_SHORTHASH)
144
message(STATUS "${CUDA_NVRTC_LIB} shorthash is ${CUDA_NVRTC_SHORTHASH}")
148
# Create new style imported libraries.
149
# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA
150
# is set. This path is where sane CUDA installations have their static
151
# libraries installed. This flag should only be used for binary builds, so
152
# end-users should never have this flag set.
155
add_library(caffe2::cuda INTERFACE IMPORTED)
157
TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES
161
add_library(torch::cudart INTERFACE IMPORTED)
162
if(CAFFE2_STATIC_LINK_CUDA)
164
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
168
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
173
find_path(nvtx3_dir NAMES nvtx3 PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH)
174
find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
176
add_library(torch::nvtx3 INTERFACE IMPORTED)
177
target_include_directories(torch::nvtx3 INTERFACE "${nvtx3_dir}")
178
target_compile_definitions(torch::nvtx3 INTERFACE TORCH_CUDA_USE_NVTX3)
180
message(WARNING "Cannot find NVTX3, find old NVTX instead")
181
add_library(torch::nvtoolsext INTERFACE IMPORTED)
182
set_property(TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES CUDA::nvToolsExt)
187
add_library(caffe2::cublas INTERFACE IMPORTED)
188
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
190
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
191
# NOTE: cublas is always linked dynamically
192
CUDA::cublas CUDA::cublasLt)
194
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
195
CUDA::cudart_static rt)
198
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
199
CUDA::cublas CUDA::cublasLt)
203
# static linking is handled by USE_STATIC_CUDNN environment variable
206
set(CUDNN_STATIC ON CACHE BOOL "")
208
set(CUDNN_STATIC OFF CACHE BOOL "")
215
"Cannot find cuDNN library. Turning the option off")
216
set(CAFFE2_USE_CUDNN OFF)
218
if(CUDNN_VERSION VERSION_LESS "8.1.0")
219
message(FATAL_ERROR "PyTorch requires cuDNN 8.1 and above.")
223
add_library(torch::cudnn INTERFACE IMPORTED)
224
target_include_directories(torch::cudnn INTERFACE ${CUDNN_INCLUDE_PATH})
225
if(CUDNN_STATIC AND NOT WIN32)
226
target_link_options(torch::cudnn INTERFACE
227
"-Wl,--exclude-libs,libcudnn_static.a")
229
target_link_libraries(torch::cudnn INTERFACE ${CUDNN_LIBRARY_PATH})
232
message(STATUS "USE_CUDNN is set to 0. Compiling without cuDNN support")
235
if(CAFFE2_USE_CUSPARSELT)
236
find_package(CUSPARSELT)
238
if(NOT CUSPARSELT_FOUND)
240
"Cannot find cuSPARSELt library. Turning the option off")
241
set(CAFFE2_USE_CUSPARSELT OFF)
243
add_library(torch::cusparselt INTERFACE IMPORTED)
244
target_include_directories(torch::cusparselt INTERFACE ${CUSPARSELT_INCLUDE_PATH})
245
target_link_libraries(torch::cusparselt INTERFACE ${CUSPARSELT_LIBRARY_PATH})
248
message(STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support")
256
"Cannot find CUDSS library. Turning the option off")
259
add_library(torch::cudss INTERFACE IMPORTED)
260
target_include_directories(torch::cudss INTERFACE ${CUDSS_INCLUDE_PATH})
261
target_link_libraries(torch::cudss INTERFACE ${CUDSS_LIBRARY_PATH})
264
message(STATUS "USE_CUDSS is set to 0. Compiling without cuDSS support")
269
add_library(torch::cufile INTERFACE IMPORTED)
270
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
272
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
276
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
280
message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support")
284
add_library(caffe2::curand INTERFACE IMPORTED)
285
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
287
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
291
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
296
add_library(caffe2::cufft INTERFACE IMPORTED)
297
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
299
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
300
CUDA::cufft_static_nocallback)
303
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
308
add_library(caffe2::nvrtc INTERFACE IMPORTED)
310
TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
311
CUDA::nvrtc caffe2::cuda)
313
# Add onnx namepsace definition to nvcc
315
list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=${ONNX_NAMESPACE}")
317
list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=onnx_c2")
320
# Don't activate VC env again for Ninja generators with MSVC on Windows if CUDAHOSTCXX is not defined
321
# by adding --use-local-env.
322
if(MSVC AND CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DEFINED ENV{CUDAHOSTCXX})
323
list(APPEND CUDA_NVCC_FLAGS "--use-local-env")
326
# setting nvcc arch flags
327
torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA)
328
# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it
329
set(CMAKE_CUDA_ARCHITECTURES OFF)
330
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
331
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}")
333
# disable some nvcc diagnostic that appears in boost, glog, glags, opencv, etc.
334
foreach(diag cc_clobber_ignored
335
field_without_dll_interface
336
base_class_has_different_dll_interface
337
dll_interface_conflict_none_assumed
338
dll_interface_conflict_dllexport_assumed
340
list(APPEND SUPPRESS_WARNING_FLAGS --diag_suppress=${diag})
342
string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}")
343
list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS})
345
set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror")
347
list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call")
348
list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward")
351
# Debug and Release symbol support
353
if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
354
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MTd")
355
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MT")
356
string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MT")
357
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MT")
359
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MDd")
360
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MD")
361
string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MD")
362
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MD")
364
if(CUDA_NVCC_FLAGS MATCHES "Zi")
365
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS")
367
elseif(CUDA_DEVICE_DEBUG)
368
list(APPEND CUDA_NVCC_FLAGS "-g" "-G") # -G enables device code debugging symbols
371
# Set expt-relaxed-constexpr to suppress Eigen warnings
372
list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
374
# Set expt-extended-lambda to support lambda on device
375
list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda")
377
foreach(FLAG ${CUDA_NVCC_FLAGS})
378
string(FIND "${FLAG}" " " flag_space_position)
379
if(NOT flag_space_position EQUAL -1)
380
message(FATAL_ERROR "Found spaces in CUDA_NVCC_FLAGS entry '${FLAG}'")
382
string(APPEND CMAKE_CUDA_FLAGS " ${FLAG}")