onnxruntime

onnxruntime_providers_cuda.cmake
307 строк · 15.9 Кб
Перенос по словам
1
# Copyright (c) Microsoft Corporation. All rights reserved.
2
# Licensed under the MIT License.
3

4

5
  if (onnxruntime_CUDA_MINIMAL)
6
    file(GLOB onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS
7
        "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h"
8
        "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cc"
9
        "${ONNXRUNTIME_ROOT}/core/providers/cuda/tunable/*.h"
10
        "${ONNXRUNTIME_ROOT}/core/providers/cuda/tunable/*.cc"
11
    )
12
    # Remove pch files
13
    list(REMOVE_ITEM onnxruntime_providers_cuda_cc_srcs
14
      "${ONNXRUNTIME_ROOT}/core/providers/cuda/integer_gemm.cc"
15
      "${ONNXRUNTIME_ROOT}/core/providers/cuda/triton_kernel.h"
16
    )
17
  else()
18
    file(GLOB_RECURSE onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS
19
      "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h"
20
      "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cc"
21
    )
22
  endif()
23
  # Remove pch files
24
  list(REMOVE_ITEM onnxruntime_providers_cuda_cc_srcs
25
    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
26
    "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
27
  )
28

29
  # The shared_library files are in a separate list since they use precompiled headers, and the above files have them disabled.
30
  file(GLOB_RECURSE onnxruntime_providers_cuda_shared_srcs CONFIGURE_DEPENDS
31
    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
32
    "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
33
  )
34

35

36
  if (NOT onnxruntime_CUDA_MINIMAL)
37
    file(GLOB_RECURSE onnxruntime_providers_cuda_cu_srcs CONFIGURE_DEPENDS
38
      "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cu"
39
      "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.cuh"
40
    )
41
  else()
42
    set(onnxruntime_providers_cuda_cu_srcs
43
        "${ONNXRUNTIME_ROOT}/core/providers/cuda/math/unary_elementwise_ops_impl.cu"
44
        )
45
  endif()
46
  source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs})
47
  set(onnxruntime_providers_cuda_src ${onnxruntime_providers_cuda_cc_srcs} ${onnxruntime_providers_cuda_shared_srcs} ${onnxruntime_providers_cuda_cu_srcs})
48

49
  # disable contrib ops conditionally
50
  if(NOT onnxruntime_DISABLE_CONTRIB_OPS AND NOT onnxruntime_CUDA_MINIMAL)
51
    if (NOT onnxruntime_ENABLE_ATEN)
52
      list(REMOVE_ITEM onnxruntime_cuda_contrib_ops_cc_srcs
53
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/aten_ops/aten_op.cc"
54
      )
55
    endif()
56
    if (NOT onnxruntime_USE_NCCL)
57
      list(REMOVE_ITEM onnxruntime_cuda_contrib_ops_cc_srcs
58
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/nccl_kernels.cc"
59
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/sharded_moe.h"
60
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/sharded_moe.cc"
61
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/sharding_spec.cc"
62
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/sharding.cc"
63
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_matmul.cc"
64
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_slice.cc"
65
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reshape.cc"
66
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_expand.cc"
67
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reduce.cc"
68
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_unsqueeze.cc"
69
        "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_squeeze.cc"
70
      )
71
    endif()
72
    # add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
73
    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_cuda_contrib_ops_cc_srcs} ${onnxruntime_cuda_contrib_ops_cu_srcs})
74
    list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_contrib_ops_cc_srcs} ${onnxruntime_cuda_contrib_ops_cu_srcs})
75
  endif()
76

77
  if (onnxruntime_ENABLE_TRAINING_OPS)
78
    file(GLOB_RECURSE onnxruntime_cuda_training_ops_cc_srcs CONFIGURE_DEPENDS
79
      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.h"
80
      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cc"
81
    )
82

83
    file(GLOB_RECURSE onnxruntime_cuda_training_ops_cu_srcs CONFIGURE_DEPENDS
84
      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cu"
85
      "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/*.cuh"
86
    )
87

88
    source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
89
    list(APPEND onnxruntime_providers_cuda_src ${onnxruntime_cuda_training_ops_cc_srcs} ${onnxruntime_cuda_training_ops_cu_srcs})
90

91
    if(NOT onnxruntime_ENABLE_TRAINING)
92
      file(GLOB_RECURSE onnxruntime_cuda_full_training_only_srcs
93
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/*.cc"
94
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/*.h"
95
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/communication/*.cc"
96
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/communication/*.h"
97
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/record.cc"
98
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/record.h"
99
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.cc"
100
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.h"
101
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/yield.cc"
102
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cc"
103
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.h"
104
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cu"
105
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.cc"
106
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.h"
107
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/triton/triton_op.cc"
108
      )
109

110
      list(REMOVE_ITEM onnxruntime_providers_cuda_src ${onnxruntime_cuda_full_training_only_srcs})
111
    elseif(WIN32 OR NOT onnxruntime_USE_NCCL)
112
      # NCCL is not support in Windows build
113
      file(GLOB_RECURSE onnxruntime_cuda_nccl_op_srcs
114
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/nccl_common.cc"
115
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/nccl_kernels.cc"
116
        "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/collective/megatron.cc"
117
      )
118
      list(REMOVE_ITEM onnxruntime_providers_cuda_src ${onnxruntime_cuda_nccl_op_srcs})
119
    endif()
120
  endif()
121

122
  if (onnxruntime_REDUCED_OPS_BUILD)
123
    substitute_op_reduction_srcs(onnxruntime_providers_cuda_src)
124
  endif()
125
  if(onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS)
126
    # cuda_provider_interface.cc is removed from the object target: onnxruntime_providers_cuda_obj and
127
    # added to the lib onnxruntime_providers_cuda separately.
128
    # onnxruntime_providers_cuda_ut can share all the object files with onnxruntime_providers_cuda except cuda_provider_interface.cc.
129
    set(cuda_provider_interface_src ${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_provider_interface.cc)
130
    list(REMOVE_ITEM onnxruntime_providers_cuda_src ${cuda_provider_interface_src})
131
    onnxruntime_add_object_library(onnxruntime_providers_cuda_obj ${onnxruntime_providers_cuda_src})
132
    onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${cuda_provider_interface_src} $<TARGET_OBJECTS:onnxruntime_providers_cuda_obj>)
133
  else()
134
    onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
135
  endif()
136
  # config_cuda_provider_shared_module can be used to config onnxruntime_providers_cuda_obj, onnxruntime_providers_cuda & onnxruntime_providers_cuda_ut.
137
  # This function guarantees that all 3 targets have the same configurations.
138
  function(config_cuda_provider_shared_module target)
139
    if (onnxruntime_REDUCED_OPS_BUILD)
140
      add_op_reduction_include_dirs(${target})
141
    endif()
142

143
    if (HAS_GUARD_CF)
144
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /guard:cf>")
145
    endif()
146

147
    if (HAS_QSPECTRE)
148
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /Qspectre>")
149
    endif()
150

151
    foreach(ORT_FLAG ${ORT_WARNING_FLAGS})
152
        target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler \"${ORT_FLAG}\">")
153
    endforeach()
154

155
    # CUDA 11.3+ supports parallel compilation
156
    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-guiding-compiler-driver-threads
157
    if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3)
158
      set(onnxruntime_NVCC_THREADS "1" CACHE STRING "Number of threads that NVCC can use for compilation.")
159
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--threads \"${onnxruntime_NVCC_THREADS}\">")
160
    endif()
161

162
    if (UNIX)
163
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-reorder>"
164
                  "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-reorder>")
165
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-error=sign-compare>"
166
                  "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-error=sign-compare>")
167
    else()
168
      #mutex.cuh(91): warning C4834: discarding return value of function with 'nodiscard' attribute
169
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /wd4834>")
170
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /wd4127>")
171
      if (MSVC)
172
        # the VS warnings for 'Conditional Expression is Constant' are spurious as they don't handle multiple conditions
173
        # e.g. `if (std::is_same_v<T, float> && not_a_const)` will generate the warning even though constexpr cannot
174
        # be used due to `&& not_a_const`. This affects too many places for it to be reasonable to disable at a finer
175
        # granularity.
176
        target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:/wd4127>")
177
      endif()
178
    endif()
179

180
    if(MSVC)
181
      target_compile_options(${target} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /Zc:__cplusplus>")
182
    endif()
183

184
    onnxruntime_add_include_to_target(${target} onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers)
185
    if (onnxruntime_ENABLE_TRAINING_OPS)
186
      onnxruntime_add_include_to_target(${target} onnxruntime_training)
187
      if (onnxruntime_ENABLE_TRAINING)
188
        target_link_libraries(${target} PRIVATE onnxruntime_training)
189
      endif()
190
      if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
191
        onnxruntime_add_include_to_target(${target} Python::Module)
192
      endif()
193
    endif()
194

195
    add_dependencies(${target} onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
196
    if(onnxruntime_CUDA_MINIMAL)
197
      target_compile_definitions(${target} PRIVATE USE_CUDA_MINIMAL)
198
      target_link_libraries(${target} PRIVATE ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface CUDA::cudart)
199
    else()
200
      include(cudnn_frontend) # also defines CUDNN::*
201
      if (onnxruntime_USE_CUDA_NHWC_OPS)
202
        if(CUDNN_MAJOR_VERSION GREATER 8)
203
          add_compile_definitions(ENABLE_CUDA_NHWC_OPS)
204
        else()
205
          message( WARNING "To compile with NHWC ops enabled please compile against cuDNN 9 or newer." )
206
        endif()
207
      endif()
208
      target_link_libraries(${target} PRIVATE CUDA::cublasLt CUDA::cublas CUDNN::cudnn_all cudnn_frontend CUDA::curand CUDA::cufft CUDA::cudart
209
              ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
210
    endif()
211

212
    if (onnxruntime_USE_TRITON_KERNEL)
213
      # compile triton kernel, generate .a and .h files
214
      include(onnxruntime_compile_triton_kernel.cmake)
215
      compile_triton_kernel(triton_kernel_obj_file triton_kernel_header_dir)
216
      add_dependencies(${target} onnxruntime_triton_kernel)
217
      target_compile_definitions(${target} PRIVATE USE_TRITON_KERNEL)
218
      target_include_directories(${target} PRIVATE ${triton_kernel_header_dir})
219
      target_link_libraries(${target} PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
220
      # lib cuda needed by cuLaunchKernel
221
      target_link_libraries(${target} PRIVATE CUDA::cuda_driver)
222
    endif()
223

224
    include(cutlass)
225
    target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples ${cutlass_SOURCE_DIR}/tools/util/include)
226

227
    target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}  ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES}
228
     PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
229
    # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
230
    set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CUDA)
231
    set_target_properties(${target} PROPERTIES FOLDER "ONNXRuntime")
232

233
    if (onnxruntime_ENABLE_CUDA_PROFILING) # configure cupti for cuda profiling
234
      target_link_libraries(${target} PRIVATE CUDA::cupti)
235
    endif()
236

237
    if (onnxruntime_ENABLE_NVTX_PROFILE)
238
      target_link_libraries(${target} PRIVATE CUDA::nvtx3)
239
    endif()
240

241
    if (onnxruntime_ENABLE_TRAINING_OPS)
242
      target_include_directories(${target} PRIVATE ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS})
243
    endif()
244

245
    if(onnxruntime_USE_MPI)
246
      target_link_libraries(${target} PRIVATE ${MPI_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
247
    endif()
248

249
    if (onnxruntime_USE_NCCL)
250
      target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIRS})
251
      target_link_libraries(${target} PRIVATE ${NCCL_LIBRARIES})
252
    endif()
253

254
    if (WIN32)
255
      # *.cu cannot use PCH
256
      if (NOT onnxruntime_BUILD_CACHE)
257
        target_precompile_headers(${target} PUBLIC
258
          "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
259
          "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
260
        )
261
      endif()
262

263
      # minimize the Windows includes.
264
      # this avoids an issue with CUDA 11.6 where 'small' is defined in the windows and cuda headers.
265
      target_compile_definitions(${target} PRIVATE "WIN32_LEAN_AND_MEAN")
266

267
      # disable a warning from the CUDA headers about unreferenced local functions
268
      #target_compile_options(${target} PRIVATE /wd4505)
269
      set(onnxruntime_providers_cuda_static_library_flags
270
          -IGNORE:4221 # LNK4221: This object file does not define any previously undefined public symbols, so it will not be used by any link operation that consumes this library
271
      )
272
      set_target_properties(${target} PROPERTIES
273
          STATIC_LIBRARY_FLAGS "${onnxruntime_providers_cuda_static_library_flags}")
274
    endif()
275

276
    if(APPLE)
277
      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/cuda/exported_symbols.lst")
278
      target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
279
    elseif(UNIX)
280
      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/cuda/version_script.lds -Xlinker --gc-sections")
281
      target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
282
    elseif(WIN32)
283
      set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/cuda/symbols.def")
284
    else()
285
      message(FATAL_ERROR "${target} unknown platform, need to specify shared library exports for it")
286
    endif()
287

288
    if (onnxruntime_ENABLE_ATEN)
289
      target_compile_definitions(${target} PRIVATE ENABLE_ATEN)
290
    endif()
291
  endfunction()
292
  if(onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS)
293
    config_cuda_provider_shared_module(onnxruntime_providers_cuda_obj)
294
  endif()
295
  config_cuda_provider_shared_module(onnxruntime_providers_cuda)
296
  # Cannot use glob because the file cuda_provider_options.h should not be exposed out.
297
  set(ONNXRUNTIME_CUDA_PROVIDER_PUBLIC_HEADERS
298
        "${REPO_ROOT}/include/onnxruntime/core/providers/cuda/cuda_context.h"
299
        "${REPO_ROOT}/include/onnxruntime/core/providers/cuda/cuda_resource.h"
300
      )
301
  set_target_properties(onnxruntime_providers_cuda PROPERTIES
302
    PUBLIC_HEADER "${ONNXRUNTIME_CUDA_PROVIDER_PUBLIC_HEADERS}")
303
  install(TARGETS onnxruntime_providers_cuda
304
          PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers/cuda
305
          ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
306
          LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
307
          RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
308
onnxruntime

Использование cookies