1
# This ill-named file does a number of things:
2
# - Installs Caffe2 header files (this has nothing to do with code generation)
3
# - Configures caffe2/core/macros.h
4
# - Creates an ATen target for its generated C++ files and adds it
6
# - Reads build lists defined in build_variables.bzl
8
################################################################################
10
################################################################################
12
function(filter_list output input)
14
foreach(filename ${${input}})
15
foreach(pattern ${ARGN})
16
if("${filename}" MATCHES "${pattern}")
17
list(APPEND result "${filename}")
21
set(${output} ${result} PARENT_SCOPE)
24
function(filter_list_exclude output input)
26
foreach(filename ${${input}})
27
foreach(pattern ${ARGN})
28
if(NOT "${filename}" MATCHES "${pattern}")
29
list(APPEND result "${filename}")
33
set(${output} ${result} PARENT_SCOPE)
36
################################################################################
38
# ---[ Write the macros file
40
${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in
41
${CMAKE_BINARY_DIR}/caffe2/core/macros.h)
43
# ---[ Installing the header files
44
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2
46
FILES_MATCHING PATTERN "*.h")
47
if(NOT INTERN_BUILD_ATEN_OPS)
48
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core
49
DESTINATION include/ATen
50
FILES_MATCHING PATTERN "*.h")
52
install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h
53
DESTINATION include/caffe2/core)
56
if(INTERN_BUILD_ATEN_OPS)
58
set(OPT_FLAG "/fp:strict ")
61
if("${CMAKE_BUILD_TYPE}" MATCHES "Debug")
66
if(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
67
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp")
70
file(GLOB_RECURSE all_python "${CMAKE_CURRENT_LIST_DIR}/../torchgen/*.py")
72
# RowwiseScaled.cu requires sm90a flags
74
set(ROWWISE_SCALED_MM_FILE "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu")
76
# Get existing arch flags
77
torch_cuda_get_nvcc_gencode_flag(EXISTING_ARCH_FLAGS)
79
# Check NVCC version and existing arch flags
80
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND
81
EXISTING_ARCH_FLAGS MATCHES ".*compute_90.*")
82
set_source_files_properties(${ROWWISE_SCALED_MM_FILE}
83
PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a")
89
set(GEN_ROCM_FLAG --rocm)
94
set(GEN_MPS_FLAG --mps)
97
set(CUSTOM_BUILD_FLAGS)
98
if(INTERN_BUILD_MOBILE)
100
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU Vulkan)
102
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU)
108
message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}")
109
list(APPEND CUSTOM_BUILD_FLAGS
110
--op_selection_yaml_path ${SELECTED_OP_LIST})
111
elseif(NOT STATIC_DISPATCH_BACKEND)
113
"You have to run tracing-based selective build with dynamic dispatch.\n"
114
"Switching to STATIC_DISPATCH_BACKEND=CPU."
116
set(STATIC_DISPATCH_BACKEND CPU)
120
if(STATIC_DISPATCH_BACKEND)
121
message(STATUS "Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}")
122
list(LENGTH STATIC_DISPATCH_BACKEND len)
123
list(APPEND CUSTOM_BUILD_FLAGS
124
--static_dispatch_backend ${STATIC_DISPATCH_BACKEND})
128
if(USE_LIGHTWEIGHT_DISPATCH)
129
file(GLOB_RECURSE all_unboxing_script "${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
130
list(APPEND CUSTOM_BUILD_FLAGS --skip_dispatcher_op_registration)
131
set(GEN_UNBOXING_COMMAND
132
"${Python_EXECUTABLE}" -m tools.jit.gen_unboxing
133
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
134
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
137
list(APPEND GEN_UNBOXING_COMMAND
138
--TEST_ONLY_op_registration_allowlist_yaml_path "${SELECTED_OP_LIST}")
140
set("GEN_UNBOXING_COMMAND_sources"
141
${GEN_UNBOXING_COMMAND}
142
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
144
message(STATUS "Generating sources for lightweight dispatch")
146
COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
147
RESULT_VARIABLE RETURN_VALUE
148
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
150
if(NOT RETURN_VALUE EQUAL 0)
151
message(FATAL_ERROR "Failed to get generated_unboxing_sources list")
154
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
156
COMMENT "Generating ATen unboxing sources"
158
${generated_unboxing_sources}
159
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
160
COMMAND ${GEN_UNBOXING_COMMAND_sources}
161
DEPENDS ${all_unboxing_script} ${sources_templates}
162
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
163
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
164
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
166
else() # Otherwise do not generate or include sources into build.
167
set(generated_unboxing_sources "")
170
set(GEN_PER_OPERATOR_FLAG)
171
if(USE_PER_OPERATOR_HEADERS)
172
list(APPEND GEN_PER_OPERATOR_FLAG "--per-operator-headers")
176
"${Python_EXECUTABLE}" -m torchgen.gen
177
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
178
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
179
${GEN_PER_OPERATOR_FLAG}
182
${CUSTOM_BUILD_FLAGS}
185
file(GLOB_RECURSE headers_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h")
186
file(GLOB_RECURSE sources_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp")
187
set(declarations_yaml_templates "")
189
foreach(gen_type "headers" "sources" "declarations_yaml")
190
# The codegen outputs may change dynamically as PyTorch is
191
# developed, but add_custom_command only supports dynamic inputs.
193
# We work around this by generating a .cmake file which is
194
# included below to set the list of output files. If that file
195
# ever changes then cmake will be re-run automatically because it
196
# was included and so we get fully dynamic outputs.
198
set("GEN_COMMAND_${gen_type}"
200
--generate ${gen_type}
201
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
204
# Dry run to bootstrap the output variables
206
COMMAND ${GEN_COMMAND_${gen_type}} --dry-run
207
RESULT_VARIABLE RETURN_VALUE
208
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
211
if(NOT RETURN_VALUE EQUAL 0)
212
message(FATAL_ERROR "Failed to get generated_${gen_type} list")
215
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake")
216
include("${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake")
217
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake")
218
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake")
219
include("${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake")
221
message(STATUS "${gen_type} outputs: ${gen_outputs}")
224
COMMENT "Generating ATen ${gen_type}"
226
${generated_${gen_type}}
227
${cuda_generated_${gen_type}}
228
${core_generated_${gen_type}}
229
${cpu_vec_generated_${gen_type}}
230
${ops_generated_${gen_type}}
231
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
232
${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake
233
${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake
234
${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake
235
${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake
236
COMMAND ${GEN_COMMAND_${gen_type}}
237
DEPENDS ${all_python} ${${gen_type}_templates}
238
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
239
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
240
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
244
# Generated headers used from a CUDA (.cu) file are
245
# not tracked correctly in CMake. We make the libATen.so depend explicitly
246
# on building the generated ATen files to workaround.
247
add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS
248
${generated_headers} ${core_generated_headers} ${cpu_vec_generated_headers} ${ops_generated_headers}
249
${generated_sources} ${core_generated_sources} ${cpu_vec_generated_sources} ${ops_generated_sources}
250
${generated_declarations_yaml} ${generated_unboxing_sources})
251
add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS
252
${cuda_generated_headers} ${cuda_generated_sources})
253
add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE)
254
add_library(ATEN_CUDA_FILES_GEN_LIB INTERFACE)
255
add_dependencies(ATEN_CPU_FILES_GEN_LIB ATEN_CPU_FILES_GEN_TARGET)
256
add_dependencies(ATEN_CUDA_FILES_GEN_LIB ATEN_CUDA_FILES_GEN_TARGET)
258
if(USE_PER_OPERATOR_HEADERS)
259
target_compile_definitions(ATEN_CPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
260
target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
263
# Handle source files that need to be compiled multiple times for
264
# different vectorization options
265
file(GLOB cpu_kernel_cpp_in "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp" "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp")
267
list(APPEND CPU_CAPABILITY_NAMES "DEFAULT")
268
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}")
271
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX512_CPU_DEFINITION")
272
list(APPEND CPU_CAPABILITY_NAMES "AVX512")
274
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
276
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma")
278
endif(CXX_AVX512_FOUND)
281
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION")
283
# Some versions of GCC pessimistically split unaligned load and store
284
# instructions when using the default tuning. This is a bad choice on
285
# new Intel and AMD processors so we disable it when compiling with AVX2.
286
# See https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top
287
check_cxx_compiler_flag("-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" COMPILER_SUPPORTS_NO_AVX256_SPLIT)
288
if(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
289
set(CPU_NO_AVX256_SPLIT_FLAGS "-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store")
290
endif(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
292
list(APPEND CPU_CAPABILITY_NAMES "AVX2")
293
if(DEFINED ENV{ATEN_AVX512_256})
294
if($ENV{ATEN_AVX512_256} MATCHES "TRUE")
296
message("-- ATen AVX2 kernels will use 32 ymm registers")
298
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
300
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}")
302
endif(CXX_AVX512_FOUND)
306
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX2")
308
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}")
311
endif(CXX_AVX2_FOUND)
314
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION")
315
LIST(APPEND CPU_CAPABILITY_NAMES "VSX")
316
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_VSX_FLAGS}")
319
if(CXX_ZVECTOR_FOUND)
320
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION")
321
LIST(APPEND CPU_CAPABILITY_NAMES "ZVECTOR")
322
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_ZVECTOR_FLAGS}")
323
endif(CXX_ZVECTOR_FOUND)
325
list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES)
326
math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1")
328
# The sources list might get reordered later based on the capabilites.
329
# See NOTE [ Linking AVX and non-AVX files ]
330
foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
331
function(process_vec NAME)
332
list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY)
333
set(NEW_IMPL ${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp)
334
configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in" ${NEW_IMPL})
335
set(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp} PARENT_SCOPE) # Create list of copies
336
list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS)
338
set(EXTRA_FLAGS "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}")
340
set(EXTRA_FLAGS "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}")
342
# Disable certain warnings for GCC-9.X
343
if(CMAKE_COMPILER_IS_GNUCXX)
344
if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp") AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))
345
# See https://github.com/pytorch/pytorch/issues/38855
346
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-uninitialized")
348
if("${NAME}" STREQUAL "native/quantized/cpu/kernels/QuantizedOpKernels.cpp")
349
# See https://github.com/pytorch/pytorch/issues/38854
350
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-deprecated-copy")
353
set_source_files_properties(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}")
355
foreach(IMPL ${cpu_kernel_cpp_in})
356
file(RELATIVE_PATH NAME "${PROJECT_SOURCE_DIR}/aten/src/ATen/" "${IMPL}")
357
process_vec("${NAME}")
359
foreach(IMPL ${cpu_vec_generated_sources})
360
file(RELATIVE_PATH NAME "${CMAKE_BINARY_DIR}/aten/src/ATen/" "${IMPL}")
361
process_vec("${NAME}")
364
list(APPEND ATen_CPU_SRCS ${cpu_kernel_cpp})
367
function(append_filelist name outputvar)
368
set(_rootdir "${Torch_SOURCE_DIR}/")
369
# configure_file adds its input to the list of CMAKE_RERUN dependencies
371
${PROJECT_SOURCE_DIR}/build_variables.bzl
372
${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl)
374
COMMAND "${Python_EXECUTABLE}" -c
375
"exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))"
376
WORKING_DIRECTORY "${_rootdir}"
377
RESULT_VARIABLE _retval
378
OUTPUT_VARIABLE _tempvar)
379
if(NOT _retval EQUAL 0)
380
message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl")
382
string(REPLACE "\n" "" _tempvar "${_tempvar}")
383
list(APPEND ${outputvar} ${_tempvar})
384
set(${outputvar} "${${outputvar}}" PARENT_SCOPE)
387
set(NUM_CPU_CAPABILITY_NAMES ${NUM_CPU_CAPABILITY_NAMES} PARENT_SCOPE)
388
set(CPU_CAPABILITY_FLAGS ${CPU_CAPABILITY_FLAGS} PARENT_SCOPE)