pytorch

BUILD.bazel
1994 строки · 64.5 Кб
Перенос по словам
1
load("@bazel_skylib//lib:paths.bzl", "paths")
2
load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
3
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
4
load("@rules_python//python:defs.bzl", "py_library", "py_test")
5
load("@pytorch//third_party:substitution.bzl", "header_template_rule", "template_rule")
6
load("@pytorch//:tools/bazel.bzl", "rules")
7
load("@pytorch//tools/rules:cu.bzl", "cu_library")
8
load("@pytorch//tools/config:defs.bzl", "if_cuda")
9
load("@pytorch//:aten.bzl", "generate_aten", "intern_build_aten_ops")
10
load(":build.bzl", "GENERATED_AUTOGRAD_CPP", "GENERATED_AUTOGRAD_PYTHON", "define_targets")
11
load(":build_variables.bzl", "jit_core_sources", "lazy_tensor_ts_sources", "libtorch_core_sources", "libtorch_cuda_sources", "libtorch_distributed_sources", "libtorch_extra_sources", "libtorch_python_core_sources", "torch_cpp_srcs", "libtorch_python_cuda_sources", "libtorch_python_distributed_sources")
12
load(":ufunc_defs.bzl", "aten_ufunc_generated_cpu_kernel_sources", "aten_ufunc_generated_cpu_sources", "aten_ufunc_generated_cuda_sources")
13
load("//:tools/bazel.bzl", "rules")
14

15
define_targets(rules = rules)
16

17
COMMON_COPTS = [
18
    "-DHAVE_MALLOC_USABLE_SIZE=1",
19
    "-DHAVE_MMAP=1",
20
    "-DHAVE_SHM_OPEN=1",
21
    "-DHAVE_SHM_UNLINK=1",
22
    "-D_FILE_OFFSET_BITS=64",
23
    "-DUSE_FBGEMM",
24
    "-DUSE_DISTRIBUTED",
25
    "-DAT_PER_OPERATOR_HEADERS",
26
    "-DATEN_THREADING=NATIVE",
27
    "-DNO_CUDNN_DESTROY_HANDLE",
28
] + if_cuda([
29
    "-DUSE_CUDA",
30
    "-DUSE_CUDNN",
31
    # TODO: This should be passed only when building for CUDA-11.5 or newer
32
    # use cub in a safe manner, see:
33
    # https://github.com/pytorch/pytorch/pull/55292
34
    "-DCUB_WRAPPED_NAMESPACE=at_cuda_detail",
35
])
36

37
aten_generation_srcs = ["aten/src/ATen/native/native_functions.yaml"] + ["aten/src/ATen/native/tags.yaml"] + glob(["aten/src/ATen/templates/**"])
38

39
generated_cpu_cpp = [
40
    "aten/src/ATen/RegisterBackendSelect.cpp",
41
    "aten/src/ATen/RegisterCPU.cpp",
42
    "aten/src/ATen/RegisterFunctionalization_0.cpp",
43
    "aten/src/ATen/RegisterFunctionalization_1.cpp",
44
    "aten/src/ATen/RegisterFunctionalization_2.cpp",
45
    "aten/src/ATen/RegisterFunctionalization_3.cpp",
46
    # "aten/src/ATen/RegisterFunctionalizationEverything.cpp",
47
    "aten/src/ATen/RegisterMkldnnCPU.cpp",
48
    "aten/src/ATen/RegisterNestedTensorCPU.cpp",
49
    "aten/src/ATen/RegisterQuantizedCPU.cpp",
50
    "aten/src/ATen/RegisterSparseCPU.cpp",
51
    "aten/src/ATen/RegisterSparseCsrCPU.cpp",
52
    "aten/src/ATen/RegisterZeroTensor.cpp",
53
    "aten/src/ATen/RegisterCompositeImplicitAutograd.cpp",
54
    "aten/src/ATen/RegisterCompositeImplicitAutogradNestedTensor.cpp",
55
    "aten/src/ATen/RegisterCompositeExplicitAutograd.cpp",
56
    "aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp",
57
    "aten/src/ATen/RegisterMeta.cpp",
58
    "aten/src/ATen/RegisterSparseMeta.cpp",
59
    "aten/src/ATen/RegisterQuantizedMeta.cpp",
60
    "aten/src/ATen/RegisterNestedTensorMeta.cpp",
61
    "aten/src/ATen/RegisterSchema.cpp",
62
    "aten/src/ATen/CPUFunctions.h",
63
    "aten/src/ATen/CPUFunctions_inl.h",
64
    "aten/src/ATen/CompositeExplicitAutogradFunctions.h",
65
    "aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h",
66
    "aten/src/ATen/CompositeExplicitAutogradNonFunctionalFunctions.h",
67
    "aten/src/ATen/CompositeExplicitAutogradNonFunctionalFunctions_inl.h",
68
    "aten/src/ATen/CompositeImplicitAutogradFunctions.h",
69
    "aten/src/ATen/CompositeImplicitAutogradFunctions_inl.h",
70
    "aten/src/ATen/CompositeImplicitAutogradNestedTensorFunctions.h",
71
    "aten/src/ATen/CompositeImplicitAutogradNestedTensorFunctions_inl.h",
72
    "aten/src/ATen/CompositeViewCopyKernels.cpp",
73
    "aten/src/ATen/FunctionalInverses.h",
74
    "aten/src/ATen/Functions.h",
75
    "aten/src/ATen/Functions.cpp",
76
    "aten/src/ATen/RedispatchFunctions.h",
77
    "aten/src/ATen/Operators.h",
78
    "aten/src/ATen/Operators_0.cpp",
79
    "aten/src/ATen/Operators_1.cpp",
80
    "aten/src/ATen/Operators_2.cpp",
81
    "aten/src/ATen/Operators_3.cpp",
82
    "aten/src/ATen/Operators_4.cpp",
83
    "aten/src/ATen/NativeFunctions.h",
84
    "aten/src/ATen/MetaFunctions.h",
85
    "aten/src/ATen/MetaFunctions_inl.h",
86
    "aten/src/ATen/MethodOperators.h",
87
    "aten/src/ATen/NativeMetaFunctions.h",
88
    "aten/src/ATen/RegistrationDeclarations.h",
89
    "aten/src/ATen/VmapGeneratedPlumbing.h",
90
    "aten/src/ATen/core/aten_interned_strings.h",
91
    "aten/src/ATen/core/enum_tag.h",
92
    "aten/src/ATen/core/TensorBody.h",
93
    "aten/src/ATen/core/TensorMethods.cpp",
94
    "aten/src/ATen/core/ATenOpList.cpp",
95
]
96

97
generated_cuda_cpp = [
98
    "aten/src/ATen/CUDAFunctions.h",
99
    "aten/src/ATen/CUDAFunctions_inl.h",
100
    "aten/src/ATen/RegisterCUDA.cpp",
101
    "aten/src/ATen/RegisterNestedTensorCUDA.cpp",
102
    "aten/src/ATen/RegisterQuantizedCUDA.cpp",
103
    "aten/src/ATen/RegisterSparseCUDA.cpp",
104
    "aten/src/ATen/RegisterSparseCsrCUDA.cpp",
105
]
106

107
generate_aten(
108
    name = "generated_aten_cpp",
109
    srcs = aten_generation_srcs,
110
    outs = (
111
        generated_cpu_cpp +
112
        generated_cuda_cpp +
113
        aten_ufunc_generated_cpu_sources("aten/src/ATen/{}") +
114
        aten_ufunc_generated_cpu_kernel_sources("aten/src/ATen/{}") +
115
        aten_ufunc_generated_cuda_sources("aten/src/ATen/{}") + [
116
            "aten/src/ATen/Declarations.yaml",
117
        ]
118
    ),
119
    generator = "//torchgen:gen",
120
)
121

122
filegroup(
123
    name = "cpp_generated_code",
124
    srcs = GENERATED_AUTOGRAD_CPP,
125
    data = [":generate-code"],
126
)
127

128
exports_files(
129
    srcs = ["aten/src/ATen/cpu/tbb/extra/version_string.ver.in"],
130
)
131

132
# ATen
133
filegroup(
134
    name = "aten_base_cpp",
135
    srcs = glob([
136
        "aten/src/ATen/*.cpp",
137
        "aten/src/ATen/functorch/*.cpp",
138
        "aten/src/ATen/detail/*.cpp",
139
        "aten/src/ATen/cpu/*.cpp",
140
    ]),
141
)
142

143
filegroup(
144
    name = "ATen_CORE_SRCS",
145
    srcs = glob(
146
        [
147
            "aten/src/ATen/core/**/*.cpp",
148
        ],
149
        exclude = [
150
            "aten/src/ATen/core/**/*_test.cpp",
151
        ],
152
    ),
153
)
154

155
filegroup(
156
    name = "aten_native_cpp",
157
    srcs = glob(["aten/src/ATen/native/*.cpp"]),
158
)
159

160
filegroup(
161
    name = "aten_native_sparse_cpp",
162
    srcs = glob(["aten/src/ATen/native/sparse/*.cpp"]),
163
)
164

165
filegroup(
166
    name = "aten_native_nested_cpp",
167
    srcs = glob(["aten/src/ATen/native/nested/*.cpp"]),
168
)
169

170
filegroup(
171
    name = "aten_native_quantized_cpp",
172
    srcs = glob(
173
        [
174
            "aten/src/ATen/native/quantized/*.cpp",
175
            "aten/src/ATen/native/quantized/cpu/*.cpp",
176
        ],
177
    ),
178
)
179

180
filegroup(
181
    name = "aten_native_transformers_cpp",
182
    srcs = glob(["aten/src/ATen/native/transformers/*.cpp"]),
183
)
184

185
filegroup(
186
    name = "aten_native_mkl_cpp",
187
    srcs = glob([
188
        "aten/src/ATen/native/mkl/*.cpp",
189
        "aten/src/ATen/mkl/*.cpp",
190
    ]),
191
)
192

193
filegroup(
194
    name = "aten_native_mkldnn_cpp",
195
    srcs = glob(["aten/src/ATen/native/mkldnn/*.cpp"]),
196
)
197

198
filegroup(
199
    name = "aten_native_xnnpack",
200
    srcs = glob(["aten/src/ATen/native/xnnpack/*.cpp"]),
201
)
202

203
filegroup(
204
    name = "aten_base_vulkan",
205
    srcs = glob(["aten/src/ATen/vulkan/*.cpp"]),
206
)
207

208
filegroup(
209
    name = "aten_base_metal",
210
    srcs = glob(["aten/src/ATen/metal/*.cpp"]),
211
)
212

213
filegroup(
214
    name = "ATen_QUANTIZED_SRCS",
215
    srcs = glob(
216
        [
217
            "aten/src/ATen/quantized/**/*.cpp",
218
        ],
219
        exclude = [
220
            "aten/src/ATen/quantized/**/*_test.cpp",
221
        ],
222
    ),
223
)
224

225
filegroup(
226
    name = "aten_cuda_cpp_srcs",
227
    srcs = glob(
228
        [
229
            "aten/src/ATen/cuda/*.cpp",
230
            "aten/src/ATen/cuda/detail/*.cpp",
231
            "aten/src/ATen/cuda/tunable/*.cpp",
232
            "aten/src/ATen/cudnn/*.cpp",
233
            "aten/src/ATen/native/cuda/*.cpp",
234
            "aten/src/ATen/native/cuda/linalg/*.cpp",
235
            "aten/src/ATen/native/cudnn/*.cpp",
236
            "aten/src/ATen/native/miopen/*.cpp",
237
            "aten/src/ATen/native/nested/cuda/*.cpp",
238
            "aten/src/ATen/native/quantized/cuda/*.cpp",
239
            "aten/src/ATen/native/quantized/cudnn/*.cpp",
240
            "aten/src/ATen/native/sparse/cuda/*.cpp",
241
            "aten/src/ATen/native/transformers/cuda/*.cpp",
242
        ],
243
    ),
244
)
245

246
filegroup(
247
    name = "aten_cu_srcs",
248
    srcs = glob([
249
        "aten/src/ATen/cuda/*.cu",
250
        "aten/src/ATen/cuda/detail/*.cu",
251
        "aten/src/ATen/native/cuda/*.cu",
252
        "aten/src/ATen/native/nested/cuda/*.cu",
253
        "aten/src/ATen/native/quantized/cuda/*.cu",
254
        "aten/src/ATen/native/sparse/cuda/*.cu",
255
        "aten/src/ATen/native/transformers/cuda/*.cu",
256
    ]) + aten_ufunc_generated_cuda_sources("aten/src/ATen/{}"),
257
    # It's a bit puzzling to me why it's not necessary to declare the
258
    # target that generates these sources...
259
)
260

261
header_template_rule(
262
    name = "aten_src_ATen_config",
263
    src = "aten/src/ATen/Config.h.in",
264
    out = "aten/src/ATen/Config.h",
265
    include = "aten/src",
266
    substitutions = {
267
        "@AT_MKLDNN_ENABLED@": "1",
268
        "@AT_MKLDNN_ACL_ENABLED@": "0",
269
        "@AT_MKL_ENABLED@": "1",
270
        "@AT_MKL_SEQUENTIAL@": "0",
271
        "@AT_POCKETFFT_ENABLED@": "0",
272
        "@AT_NNPACK_ENABLED@": "0",
273
        "@CAFFE2_STATIC_LINK_CUDA_INT@": "0",
274
        "@AT_BUILD_WITH_BLAS@": "1",
275
        "@AT_BUILD_WITH_LAPACK@": "1",
276
        "@AT_PARALLEL_OPENMP@": "0",
277
        "@AT_PARALLEL_NATIVE@": "1",
278
        "@AT_PARALLEL_NATIVE_TBB@": "0",
279
        "@AT_BLAS_F2C@": "0",
280
        "@AT_BLAS_USE_CBLAS_DOT@": "1",
281
    },
282
)
283

284
header_template_rule(
285
    name = "aten_src_ATen_cuda_config",
286
    src = "aten/src/ATen/cuda/CUDAConfig.h.in",
287
    out = "aten/src/ATen/cuda/CUDAConfig.h",
288
    include = "aten/src",
289
    substitutions = {
290
        "@AT_CUDNN_ENABLED@": "1",
291
        "@AT_CUSPARSELT_ENABLED@": "0",
292
        "@AT_ROCM_ENABLED@": "0",
293
        "@AT_MAGMA_ENABLED@": "0",
294
        "@NVCC_FLAGS_EXTRA@": "",
295
    },
296
)
297

298
cc_library(
299
    name = "aten_headers",
300
    hdrs = [
301
        "torch/csrc/Export.h",
302
        "torch/csrc/jit/frontend/function_schema_parser.h",
303
    ] + glob(
304
        [
305
            "aten/src/**/*.h",
306
            "aten/src/**/*.hpp",
307
            "aten/src/ATen/cuda/**/*.cuh",
308
            "aten/src/ATen/native/**/*.cuh",
309
            "aten/src/THC/*.cuh",
310
        ],
311
    ) + [
312
        ":aten_src_ATen_config",
313
        ":generated_aten_cpp",
314
    ],
315
    includes = [
316
        "aten/src",
317
    ],
318
    deps = [
319
        "//c10",
320
    ],
321
)
322

323
ATEN_COPTS = COMMON_COPTS + [
324
    "-DCAFFE2_BUILD_MAIN_LIBS",
325
    "-DHAVE_AVX_CPU_DEFINITION",
326
    "-DHAVE_AVX2_CPU_DEFINITION",
327
    "-fvisibility-inlines-hidden",
328
    "-fno-math-errno",
329
    "-fno-trapping-math",
330
]
331

332
intern_build_aten_ops(
333
    copts = ATEN_COPTS,
334
    extra_impls = aten_ufunc_generated_cpu_kernel_sources("aten/src/ATen/{}"),
335
    deps = [
336
        ":aten_headers",
337
        "@fbgemm",
338
        "@mkl",
339
        "@sleef",
340
    ],
341
)
342

343
cc_library(
344
    name = "aten",
345
    srcs = [
346
        ":ATen_CORE_SRCS",
347
        ":ATen_QUANTIZED_SRCS",
348
        ":aten_base_cpp",
349
        ":aten_base_metal",
350
        ":aten_base_vulkan",
351
        ":aten_native_cpp",
352
        ":aten_native_mkl_cpp",
353
        ":aten_native_mkldnn_cpp",
354
        ":aten_native_nested_cpp",
355
        ":aten_native_quantized_cpp",
356
        ":aten_native_sparse_cpp",
357
        ":aten_native_transformers_cpp",
358
        ":aten_native_xnnpack",
359
        ":aten_src_ATen_config",
360
    ] + generated_cpu_cpp + aten_ufunc_generated_cpu_sources("aten/src/ATen/{}"),
361
    copts = ATEN_COPTS,
362
    data = if_cuda(
363
        [":libcaffe2_nvrtc.so"],
364
        [],
365
    ),
366
    visibility = ["//visibility:public"],
367
    deps = [
368
        ":ATen_CPU",
369
        ":aten_headers",
370
        ":caffe2_for_aten_headers",
371
        ":torch_headers",
372
        "@fbgemm",
373
        "@ideep",
374
    ],
375
    alwayslink = True,
376
)
377

378
cc_library(
379
    name = "aten_nvrtc",
380
    srcs = glob([
381
        "aten/src/ATen/cuda/nvrtc_stub/*.cpp",
382
    ]),
383
    copts = ATEN_COPTS,
384
    linkstatic = True,
385
    visibility = ["//visibility:public"],
386
    deps = [
387
        ":aten_headers",
388
        "//c10",
389
        "@cuda",
390
        "@cuda//:cuda_driver",
391
        "@cuda//:nvrtc",
392
    ],
393
    alwayslink = True,
394
)
395

396
cc_binary(
397
    name = "libcaffe2_nvrtc.so",
398
    linkshared = True,
399
    visibility = ["//visibility:public"],
400
    deps = [
401
        ":aten_nvrtc",
402
    ],
403
)
404

405
cc_library(
406
    name = "aten_cuda_cpp",
407
    srcs = [":aten_cuda_cpp_srcs"] + generated_cuda_cpp,
408
    hdrs = [":aten_src_ATen_cuda_config"],
409
    copts = ATEN_COPTS,
410
    visibility = ["//visibility:public"],
411
    deps = [
412
        ":aten",
413
        "@cuda",
414
        "@cuda//:cusolver",
415
        "@cuda//:nvrtc",
416
        "@cudnn",
417
        "@cudnn_frontend",
418
    ],
419
    alwayslink = True,
420
)
421

422
torch_cuda_half_options = [
423
    "-DCUDA_HAS_FP16=1",
424
    "-D__CUDA_NO_HALF_OPERATORS__",
425
    "-D__CUDA_NO_HALF_CONVERSIONS__",
426
    "-D__CUDA_NO_BFLOAT16_CONVERSIONS__",
427
    "-D__CUDA_NO_HALF2_OPERATORS__",
428
]
429

430
cu_library(
431
    name = "aten_cuda",
432
    srcs = [":aten_cu_srcs"],
433
    copts = ATEN_COPTS + torch_cuda_half_options,
434
    visibility = ["//visibility:public"],
435
    deps = [
436
        ":aten_cuda_cpp",
437
        "//c10/util:bit_cast",
438
        "@cuda//:cublas",
439
        "@cuda//:cufft",
440
        "@cuda//:cusparse",
441
        "@cutlass",
442
    ],
443
    alwayslink = True,
444
)
445

446
# caffe2
447
CAFFE2_COPTS = COMMON_COPTS + [
448
    "-Dcaffe2_EXPORTS",
449
    "-DCAFFE2_USE_GLOO",
450
    "-DCAFFE2_USE_CUDNN",
451
    "-DCAFFE2_BUILD_MAIN_LIB",
452
    "-fvisibility-inlines-hidden",
453
    "-fno-math-errno",
454
    "-fno-trapping-math",
455
]
456

457
filegroup(
458
    name = "caffe2_contrib_srcs",
459
    srcs = [
460
        "caffe2/contrib/aten/aten_op.cc",
461
        "caffe2/contrib/gloo/allgather_ops.cc",
462
        "caffe2/contrib/gloo/allreduce_ops.cc",
463
        "caffe2/contrib/gloo/barrier_ops.cc",
464
        "caffe2/contrib/gloo/broadcast_ops.cc",
465
        "caffe2/contrib/gloo/common.cc",
466
        "caffe2/contrib/gloo/common_world_ops.cc",
467
        "caffe2/contrib/gloo/context.cc",
468
        "caffe2/contrib/gloo/reduce_scatter_ops.cc",
469
        "caffe2/contrib/gloo/store_handler.cc",
470
    ],
471
)
472

473
filegroup(
474
    name = "caffe2_core_srcs",
475
    srcs = [
476
        "caffe2/core/allocator.cc",
477
        "caffe2/core/blob_serialization.cc",
478
        "caffe2/core/blob_stats.cc",
479
        "caffe2/core/common.cc",
480
        "caffe2/core/context.cc",
481
        "caffe2/core/context_base.cc",
482
        "caffe2/core/db.cc",
483
        "caffe2/core/event.cc",
484
        "caffe2/core/export_c10_op_to_caffe2.cc",
485
        "caffe2/core/graph.cc",
486
        "caffe2/core/init.cc",
487
        "caffe2/core/init_denormals.cc",
488
        "caffe2/core/init_intrinsics_check.cc",
489
        "caffe2/core/init_omp.cc",
490
        "caffe2/core/int8_serialization.cc",
491
        "caffe2/core/memonger.cc",
492
        "caffe2/core/module.cc",
493
        "caffe2/core/net.cc",
494
        "caffe2/core/net_async_base.cc",
495
        "caffe2/core/net_async_scheduling.cc",
496
        "caffe2/core/net_async_task.cc",
497
        "caffe2/core/net_async_task_future.cc",
498
        "caffe2/core/net_async_task_graph.cc",
499
        "caffe2/core/net_async_tracing.cc",
500
        "caffe2/core/net_dag_utils.cc",
501
        "caffe2/core/net_parallel.cc",
502
        "caffe2/core/net_simple.cc",
503
        "caffe2/core/net_simple_refcount.cc",
504
        "caffe2/core/nomnigraph/Representations/NeuralNet.cc",
505
        "caffe2/core/nomnigraph/tests/test_util.cc",
506
        "caffe2/core/numa.cc",
507
        "caffe2/core/operator.cc",
508
        "caffe2/core/operator_schema.cc",
509
        "caffe2/core/plan_executor.cc",
510
        "caffe2/core/prof_dag_counters.cc",
511
        "caffe2/core/qtensor.cc",
512
        "caffe2/core/qtensor_serialization.cc",
513
        "caffe2/core/stats.cc",
514
        "caffe2/core/tensor.cc",
515
        "caffe2/core/tensor_int8.cc",
516
        "caffe2/core/test_utils.cc",
517
        "caffe2/core/transform.cc",
518
        "caffe2/core/types.cc",
519
        "caffe2/core/workspace.cc",
520
    ],
521
)
522

523
filegroup(
524
    name = "caffe2_distributed_srcs",
525
    srcs = [
526
        "caffe2/distributed/file_store_handler.cc",
527
        "caffe2/distributed/file_store_handler_op.cc",
528
        "caffe2/distributed/store_handler.cc",
529
        "caffe2/distributed/store_ops.cc",
530
    ],
531
)
532

533
filegroup(
534
    name = "caffe2_ideep_srcs",
535
    srcs = [
536
        "caffe2/ideep/operators/adam_op.cc",
537
        "caffe2/ideep/operators/channel_shuffle_op.cc",
538
        "caffe2/ideep/operators/concat_split_op.cc",
539
        "caffe2/ideep/operators/conv_op.cc",
540
        "caffe2/ideep/operators/conv_transpose_op.cc",
541
        "caffe2/ideep/operators/dropout_op.cc",
542
        "caffe2/ideep/operators/elementwise_sum_op.cc",
543
        "caffe2/ideep/operators/expand_squeeze_dims_op.cc",
544
        "caffe2/ideep/operators/fully_connected_op.cc",
545
        "caffe2/ideep/operators/local_response_normalization_op.cc",
546
        "caffe2/ideep/operators/momentum_sgd_op.cc",
547
        "caffe2/ideep/operators/operator_fallback_ideep.cc",
548
        "caffe2/ideep/operators/order_switch_ops.cc",
549
        "caffe2/ideep/operators/pool_op.cc",
550
        "caffe2/ideep/operators/quantization/int8_add_op.cc",
551
        "caffe2/ideep/operators/quantization/int8_conv_op.cc",
552
        "caffe2/ideep/operators/quantization/int8_dequantize_op.cc",
553
        "caffe2/ideep/operators/quantization/int8_fully_connected_op.cc",
554
        "caffe2/ideep/operators/quantization/int8_given_tensor_fill_op.cc",
555
        "caffe2/ideep/operators/quantization/int8_pool_op.cc",
556
        "caffe2/ideep/operators/quantization/int8_quantize_op.cc",
557
        "caffe2/ideep/operators/quantization/int8_relu_op.cc",
558
        "caffe2/ideep/operators/queue_ops.cc",
559
        "caffe2/ideep/operators/relu_op.cc",
560
        "caffe2/ideep/operators/reshape_op.cc",
561
        "caffe2/ideep/operators/shape_op.cc",
562
        "caffe2/ideep/operators/sigmoid_op.cc",
563
        "caffe2/ideep/operators/spatial_batch_norm_op.cc",
564
        "caffe2/ideep/operators/transpose_op.cc",
565
        "caffe2/ideep/operators/utility_ops.cc",
566
        "caffe2/ideep/utils/ideep_register.cc",
567
    ],
568
)
569

570
filegroup(
571
    name = "caffe2_onnx_srcs",
572
    srcs = [
573
        "caffe2/onnx/backend.cc",
574
        "caffe2/onnx/backend_rep.cc",
575
        "caffe2/onnx/device.cc",
576
        "caffe2/onnx/helper.cc",
577
        "caffe2/onnx/offline_tensor.cc",
578
        "caffe2/onnx/onnx_exporter.cc",
579
        "caffe2/onnx/onnxifi_graph_info.cc",
580
        "caffe2/onnx/onnxifi_init.cc",
581
    ],
582
)
583

584
filegroup(
585
    name = "caffe2_operators_srcs",
586
    srcs = [
587
        "caffe2/operators/abs_op.cc",
588
        "caffe2/operators/accumulate_op.cc",
589
        "caffe2/operators/accuracy_op.cc",
590
        "caffe2/operators/acos_op.cc",
591
        "caffe2/operators/affine_channel_op.cc",
592
        "caffe2/operators/alias_with_name.cc",
593
        "caffe2/operators/apmeter_op.cc",
594
        "caffe2/operators/arg_ops.cc",
595
        "caffe2/operators/asin_op.cc",
596
        "caffe2/operators/assert_op.cc",
597
        "caffe2/operators/atan_op.cc",
598
        "caffe2/operators/atomic_ops.cc",
599
        "caffe2/operators/batch_box_cox_op.cc",
600
        "caffe2/operators/batch_bucketize_op.cc",
601
        "caffe2/operators/batch_gather_ops.cc",
602
        "caffe2/operators/batch_matmul_op.cc",
603
        "caffe2/operators/batch_moments_op.cc",
604
        "caffe2/operators/batch_permutation_op.cc",
605
        "caffe2/operators/batch_sparse_to_dense_op.cc",
606
        "caffe2/operators/bbox_transform_op.cc",
607
        "caffe2/operators/bisect_percentile_op.cc",
608
        "caffe2/operators/boolean_mask_ops.cc",
609
        "caffe2/operators/boolean_unmask_ops.cc",
610
        "caffe2/operators/box_with_nms_limit_op.cc",
611
        "caffe2/operators/bucketize_op.cc",
612
        "caffe2/operators/byte_weight_dequant_op.cc",
613
        "caffe2/operators/cast_op.cc",
614
        "caffe2/operators/cbrt_op.cc",
615
        "caffe2/operators/cc_bmm_bg_op.cc",
616
        "caffe2/operators/ceil_op.cc",
617
        "caffe2/operators/channel_backprop_stats_op.cc",
618
        "caffe2/operators/channel_shuffle_op.cc",
619
        "caffe2/operators/channel_stats_op.cc",
620
        "caffe2/operators/clip_op.cc",
621
        "caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.cc",
622
        "caffe2/operators/communicator_op.cc",
623
        "caffe2/operators/concat_split_op.cc",
624
        "caffe2/operators/conditional_op.cc",
625
        "caffe2/operators/conv_gradient_op.cc",
626
        "caffe2/operators/conv_op.cc",
627
        "caffe2/operators/conv_op_eigen.cc",
628
        "caffe2/operators/conv_op_shared.cc",
629
        "caffe2/operators/conv_transpose_gradient_op.cc",
630
        "caffe2/operators/conv_transpose_op.cc",
631
        "caffe2/operators/conv_transpose_op_mobile.cc",
632
        "caffe2/operators/copy_op.cc",
633
        "caffe2/operators/copy_rows_to_tensor_op.cc",
634
        "caffe2/operators/cos_op.cc",
635
        "caffe2/operators/cosh_op.cc",
636
        "caffe2/operators/cosine_embedding_criterion_op.cc",
637
        "caffe2/operators/counter_ops.cc",
638
        "caffe2/operators/crash_op.cc",
639
        "caffe2/operators/create_scope_op.cc",
640
        "caffe2/operators/crf_viterbi_op.cc",
641
        "caffe2/operators/cross_entropy_op.cc",
642
        "caffe2/operators/ctc_beam_search_decoder_op.cc",
643
        "caffe2/operators/ctc_greedy_decoder_op.cc",
644
        "caffe2/operators/cube_op.cc",
645
        "caffe2/operators/data_couple.cc",
646
        "caffe2/operators/dataset_ops.cc",
647
        "caffe2/operators/deform_conv_gradient_op.cc",
648
        "caffe2/operators/deform_conv_op.cc",
649
        "caffe2/operators/dense_vector_to_id_list_op.cc",
650
        "caffe2/operators/distance_op.cc",
651
        "caffe2/operators/do_op.cc",
652
        "caffe2/operators/dropout_op.cc",
653
        "caffe2/operators/elementwise_add_gradient_op.cc",
654
        "caffe2/operators/elementwise_add_op.cc",
655
        "caffe2/operators/elementwise_div_gradient_op.cc",
656
        "caffe2/operators/elementwise_div_op.cc",
657
        "caffe2/operators/elementwise_linear_op.cc",
658
        "caffe2/operators/elementwise_logical_ops.cc",
659
        "caffe2/operators/elementwise_mul_gradient_op.cc",
660
        "caffe2/operators/elementwise_mul_op.cc",
661
        "caffe2/operators/elementwise_ops.cc",
662
        "caffe2/operators/elementwise_ops_schema.cc",
663
        "caffe2/operators/elementwise_ops_utils.cc",
664
        "caffe2/operators/elementwise_sub_gradient_op.cc",
665
        "caffe2/operators/elementwise_sub_op.cc",
666
        "caffe2/operators/elementwise_sum_op.cc",
667
        "caffe2/operators/elu_op.cc",
668
        "caffe2/operators/enforce_finite_op.cc",
669
        "caffe2/operators/ensure_clipped_op.cc",
670
        "caffe2/operators/ensure_cpu_output_op.cc",
671
        "caffe2/operators/erf_op.cc",
672
        "caffe2/operators/exp_op.cc",
673
        "caffe2/operators/expand_op.cc",
674
        "caffe2/operators/expand_squeeze_dims_op.cc",
675
        "caffe2/operators/fc_inference.cc",
676
        "caffe2/operators/feature_maps_ops.cc",
677
        "caffe2/operators/feed_blob_op.cc",
678
        "caffe2/operators/filler_op.cc",
679
        "caffe2/operators/find_duplicate_elements_op.cc",
680
        "caffe2/operators/find_op.cc",
681
        "caffe2/operators/flatten_op.cc",
682
        "caffe2/operators/flexible_top_k.cc",
683
        "caffe2/operators/floor_op.cc",
684
        "caffe2/operators/free_op.cc",
685
        "caffe2/operators/fully_connected_op.cc",
686
        "caffe2/operators/fused_rowwise_8bit_conversion_ops.cc",
687
        "caffe2/operators/fused_rowwise_random_quantization_ops.cc",
688
        "caffe2/operators/gather_fused_8bit_rowwise_op.cc",
689
        "caffe2/operators/gather_op.cc",
690
        "caffe2/operators/gather_ranges_to_dense_op.cc",
691
        "caffe2/operators/gelu_op.cc",
692
        "caffe2/operators/generate_proposals_op.cc",
693
        "caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cc",
694
        "caffe2/operators/given_tensor_fill_op.cc",
695
        "caffe2/operators/glu_op.cc",
696
        "caffe2/operators/group_norm_op.cc",
697
        "caffe2/operators/gru_unit_op.cc",
698
        "caffe2/operators/h_softmax_op.cc",
699
        "caffe2/operators/half_float_ops.cc",
700
        "caffe2/operators/hard_sigmoid_op.cc",
701
        "caffe2/operators/heatmap_max_keypoint_op.cc",
702
        "caffe2/operators/if_op.cc",
703
        "caffe2/operators/im2col_op.cc",
704
        "caffe2/operators/index_hash_ops.cc",
705
        "caffe2/operators/index_ops.cc",
706
        "caffe2/operators/inference_lstm_op.cc",
707
        "caffe2/operators/instance_norm_gradient_op.cc",
708
        "caffe2/operators/instance_norm_op.cc",
709
        "caffe2/operators/integral_image_op.cc",
710
        "caffe2/operators/is_empty_op.cc",
711
        "caffe2/operators/jsd_op.cc",
712
        "caffe2/operators/key_split_ops.cc",
713
        "caffe2/operators/last_n_window_collector.cc",
714
        "caffe2/operators/layer_norm_op.cc",
715
        "caffe2/operators/leaky_relu_op.cc",
716
        "caffe2/operators/length_split_op.cc",
717
        "caffe2/operators/lengths_pad_op.cc",
718
        "caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.cc",
719
        "caffe2/operators/lengths_reducer_ops.cc",
720
        "caffe2/operators/lengths_reducer_rowwise_8bit_ops.cc",
721
        "caffe2/operators/lengths_tile_op.cc",
722
        "caffe2/operators/lengths_top_k_op.cc",
723
        "caffe2/operators/listwise_l2r_op.cc",
724
        "caffe2/operators/load_save_op.cc",
725
        "caffe2/operators/load_save_op_util.cc",
726
        "caffe2/operators/local_response_normalization_op.cc",
727
        "caffe2/operators/locally_connected_op.cc",
728
        "caffe2/operators/locally_connected_op_util.cc",
729
        "caffe2/operators/log_op.cc",
730
        "caffe2/operators/logit_op.cc",
731
        "caffe2/operators/loss_op.cc",
732
        "caffe2/operators/lp_pool_op.cc",
733
        "caffe2/operators/lpnorm_op.cc",
734
        "caffe2/operators/lstm_unit_op.cc",
735
        "caffe2/operators/map_ops.cc",
736
        "caffe2/operators/margin_ranking_criterion_op.cc",
737
        "caffe2/operators/matmul_op.cc",
738
        "caffe2/operators/mean_op.cc",
739
        "caffe2/operators/merge_id_lists_op.cc",
740
        "caffe2/operators/minmax_gradient_ops.cc",
741
        "caffe2/operators/minmax_ops.cc",
742
        "caffe2/operators/mod_op.cc",
743
        "caffe2/operators/moments_op.cc",
744
        "caffe2/operators/multi_class_accuracy_op.cc",
745
        "caffe2/operators/negate_gradient_op.cc",
746
        "caffe2/operators/negative_op.cc",
747
        "caffe2/operators/ngram_ops.cc",
748
        "caffe2/operators/norm_planar_yuv_op.cc",
749
        "caffe2/operators/normalize_l1_op.cc",
750
        "caffe2/operators/normalize_op.cc",
751
        "caffe2/operators/numpy_tile_op.cc",
752
        "caffe2/operators/one_hot_ops.cc",
753
        "caffe2/operators/onnx_while_op.cc",
754
        "caffe2/operators/order_switch_ops.cc",
755
        "caffe2/operators/pack_rnn_sequence_op.cc",
756
        "caffe2/operators/pack_segments.cc",
757
        "caffe2/operators/pad_op.cc",
758
        "caffe2/operators/partition_ops.cc",
759
        "caffe2/operators/percentile_op.cc",
760
        "caffe2/operators/perplexity_op.cc",
761
        "caffe2/operators/piecewise_linear_transform_op.cc",
762
        "caffe2/operators/pool_gradient_op.cc",
763
        "caffe2/operators/pool_op.cc",
764
        "caffe2/operators/pool_op_util.cc",
765
        "caffe2/operators/pow_op.cc",
766
        "caffe2/operators/prelu_op.cc",
767
        "caffe2/operators/prepend_dim_op.cc",
768
        "caffe2/operators/quant_decode_op.cc",
769
        "caffe2/operators/rank_loss_op.cc",
770
        "caffe2/operators/reciprocal_gradient_op.cc",
771
        "caffe2/operators/reciprocal_op.cc",
772
        "caffe2/operators/reduce_front_back_max_ops.cc",
773
        "caffe2/operators/reduce_front_back_mean_ops.cc",
774
        "caffe2/operators/reduce_front_back_sum_ops.cc",
775
        "caffe2/operators/reduce_ops.cc",
776
        "caffe2/operators/reduction_ops.cc",
777
        "caffe2/operators/relu_n_op.cc",
778
        "caffe2/operators/relu_op.cc",
779
        "caffe2/operators/remove_data_blocks_op.cc",
780
        "caffe2/operators/replace_nan_op.cc",
781
        "caffe2/operators/reservoir_sampling.cc",
782
        "caffe2/operators/reshape_op.cc",
783
        "caffe2/operators/resize_3d_op.cc",
784
        "caffe2/operators/resize_op.cc",
785
        "caffe2/operators/reverse_packed_segs_op.cc",
786
        "caffe2/operators/rmac_regions_op.cc",
787
        "caffe2/operators/rnn/recurrent_network_blob_fetcher_op.cc",
788
        "caffe2/operators/rnn/recurrent_network_executor.cc",
789
        "caffe2/operators/rnn/recurrent_network_op.cc",
790
        "caffe2/operators/roi_align_gradient_op.cc",
791
        "caffe2/operators/roi_align_op.cc",
792
        "caffe2/operators/roi_align_rotated_gradient_op.cc",
793
        "caffe2/operators/roi_align_rotated_op.cc",
794
        "caffe2/operators/roi_pool_op.cc",
795
        "caffe2/operators/rowmul_op.cc",
796
        "caffe2/operators/rsqrt_op.cc",
797
        "caffe2/operators/scale_blobs_op.cc",
798
        "caffe2/operators/scale_op.cc",
799
        "caffe2/operators/segment_reduction_op.cc",
800
        "caffe2/operators/selu_op.cc",
801
        "caffe2/operators/sequence_ops.cc",
802
        "caffe2/operators/shape_op.cc",
803
        "caffe2/operators/sigmoid_gradient_op.cc",
804
        "caffe2/operators/sigmoid_op.cc",
805
        "caffe2/operators/sin_op.cc",
806
        "caffe2/operators/sinh_op.cc",
807
        "caffe2/operators/sinusoid_position_encoding_op.cc",
808
        "caffe2/operators/slice_op.cc",
809
        "caffe2/operators/softmax_op.cc",
810
        "caffe2/operators/softmax_utils.cc",
811
        "caffe2/operators/softmax_with_loss_op.cc",
812
        "caffe2/operators/softplus_op.cc",
813
        "caffe2/operators/softsign_op.cc",
814
        "caffe2/operators/space_batch_op.cc",
815
        "caffe2/operators/sparse_dropout_with_replacement_op.cc",
816
        "caffe2/operators/sparse_normalize_op.cc",
817
        "caffe2/operators/sparse_to_dense_mask_op.cc",
818
        "caffe2/operators/sparse_to_dense_op.cc",
819
        "caffe2/operators/spatial_batch_norm_gradient_op.cc",
820
        "caffe2/operators/spatial_batch_norm_op.cc",
821
        "caffe2/operators/spatial_softmax_with_loss_op.cc",
822
        "caffe2/operators/sqr_op.cc",
823
        "caffe2/operators/sqrt_op.cc",
824
        "caffe2/operators/square_root_divide_op.cc",
825
        "caffe2/operators/stats_ops.cc",
826
        "caffe2/operators/stats_put_ops.cc",
827
        "caffe2/operators/stop_gradient.cc",
828
        "caffe2/operators/string_ops.cc",
829
        "caffe2/operators/stump_func_op.cc",
830
        "caffe2/operators/stylizer_ops.cc",
831
        "caffe2/operators/summarize_op.cc",
832
        "caffe2/operators/swish_op.cc",
833
        "caffe2/operators/tan_op.cc",
834
        "caffe2/operators/tanh_gradient_op.cc",
835
        "caffe2/operators/tanh_op.cc",
836
        "caffe2/operators/tensor_protos_db_input.cc",
837
        "caffe2/operators/text_file_reader.cc",
838
        "caffe2/operators/text_file_reader_utils.cc",
839
        "caffe2/operators/thresholded_relu_op.cc",
840
        "caffe2/operators/tile_op.cc",
841
        "caffe2/operators/top_k.cc",
842
        "caffe2/operators/transpose_op.cc",
843
        "caffe2/operators/tt_linear_op.cc",
844
        "caffe2/operators/unique_ops.cc",
845
        "caffe2/operators/upsample_op.cc",
846
        "caffe2/operators/utility_ops.cc",
847
        "caffe2/operators/variable_length_sequence_padding.cc",
848
        "caffe2/operators/weighted_multi_sampling_op.cc",
849
        "caffe2/operators/weighted_sample_op.cc",
850
        "caffe2/operators/while_op.cc",
851
        "caffe2/operators/workspace_ops.cc",
852
        "caffe2/operators/zero_gradient_op.cc",
853
    ],
854
)
855

856
filegroup(
857
    name = "caffe2_opt_srcs",
858
    srcs = [
859
        "caffe2/opt/annotations.cc",
860
        "caffe2/opt/backend_cutting.cc",
861
        "caffe2/opt/backend_transformer_base.cc",
862
        "caffe2/opt/bound_shape_inferencer.cc",
863
        "caffe2/opt/converter.cc",
864
        "caffe2/opt/dead_code_elim.cc",
865
        "caffe2/opt/device.cc",
866
        "caffe2/opt/distributed.cc",
867
        "caffe2/opt/distributed_converter.cc",
868
        "caffe2/opt/fusion.cc",
869
        "caffe2/opt/mobile.cc",
870
        "caffe2/opt/onnxifi_op.cc",
871
        "caffe2/opt/onnxifi_transformer.cc",
872
        "caffe2/opt/optimize_ideep.cc",
873
        "caffe2/opt/optimizer.cc",
874
        "caffe2/opt/passes.cc",
875
        "caffe2/opt/shape_info.cc",
876
        "caffe2/opt/tvm_transformer.cc",
877
    ],
878
)
879

880
filegroup(
881
    name = "caffe2_perfkernels_srcs",
882
    srcs = [
883
        "caffe2/perfkernels/adagrad.cc",
884
        "caffe2/perfkernels/embedding_lookup.cc",
885
        "caffe2/perfkernels/embedding_lookup_idx.cc",
886
        "caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc",
887
        "caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.cc",
888
        "caffe2/perfkernels/fused_nbit_rowwise_conversion.cc",
889
        "caffe2/perfkernels/lstm_unit_cpu_common.cc",
890
        "caffe2/perfkernels/math_cpu_base.cc",
891
        "caffe2/perfkernels/typed_axpy.cc",
892
    ],
893
)
894

895
filegroup(
896
    name = "caffe2_predictor_srcs",
897
    srcs = [
898
        "caffe2/predictor/emulator/data_filler.cc",
899
        "caffe2/predictor/emulator/data_filler.h",
900
        "caffe2/predictor/predictor.cc",
901
        "caffe2/predictor/predictor_config.cc",
902
        "caffe2/predictor/predictor_utils.cc",
903
    ],
904
)
905

906
filegroup(
907
    name = "caffe2_quantization_srcs",
908
    srcs = [
909
        "caffe2/quantization/server/activation_distribution_observer.cc",
910
        "caffe2/quantization/server/batch_matmul_dnnlowp_op.cc",
911
        "caffe2/quantization/server/caffe2_dnnlowp_utils.cc",
912
        "caffe2/quantization/server/channel_shuffle_dnnlowp_op.cc",
913
        "caffe2/quantization/server/concat_dnnlowp_op.cc",
914
        "caffe2/quantization/server/conv_dnnlowp_acc16_op.cc",
915
        "caffe2/quantization/server/conv_dnnlowp_op.cc",
916
        "caffe2/quantization/server/conv_relu_op.cc",
917
        "caffe2/quantization/server/dequantize_dnnlowp_op.cc",
918
        "caffe2/quantization/server/dnnlowp.cc",
919
        "caffe2/quantization/server/dnnlowp_partition.cc",
920
        "caffe2/quantization/server/dynamic_histogram.cc",
921
        "caffe2/quantization/server/elementwise_add_dnnlowp_op.cc",
922
        "caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc",
923
        "caffe2/quantization/server/elementwise_mul_dnnlowp_op.cc",
924
        "caffe2/quantization/server/elementwise_sum_dnnlowp_op.cc",
925
        "caffe2/quantization/server/elementwise_sum_relu_op.cc",
926
        "caffe2/quantization/server/fbgemm_pack_matrix_cache.cc",
927
        "caffe2/quantization/server/fbgemm_pack_op.cc",
928
        "caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc",
929
        "caffe2/quantization/server/fully_connected_dnnlowp_op.cc",
930
        "caffe2/quantization/server/fully_connected_fake_lowp_op.cc",
931
        "caffe2/quantization/server/group_norm_dnnlowp_op.cc",
932
        "caffe2/quantization/server/int8_gen_quant_params.cc",
933
        "caffe2/quantization/server/kl_minimization.cc",
934
        "caffe2/quantization/server/lstm_unit_dnnlowp_op.cc",
935
        "caffe2/quantization/server/norm_minimization.cc",
936
        "caffe2/quantization/server/p99.cc",
937
        "caffe2/quantization/server/pool_dnnlowp_op.cc",
938
        "caffe2/quantization/server/quantize_dnnlowp_op.cc",
939
        "caffe2/quantization/server/relu_dnnlowp_op.cc",
940
        "caffe2/quantization/server/sigmoid.cc",
941
        "caffe2/quantization/server/sigmoid_dnnlowp_op.cc",
942
        "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.cc",
943
        "caffe2/quantization/server/tanh.cc",
944
        "caffe2/quantization/server/tanh_dnnlowp_op.cc",
945
        "caffe2/quantization/server/utility_dnnlowp_ops.cc",
946
    ],
947
)
948

949
filegroup(
950
    name = "caffe2_queue_srcs",
951
    srcs = [
952
        "caffe2/queue/blobs_queue.cc",
953
        "caffe2/queue/blobs_queue_db.cc",
954
        "caffe2/queue/queue_ops.cc",
955
        "caffe2/queue/rebatching_queue.cc",
956
        "caffe2/queue/rebatching_queue_ops.cc",
957
    ],
958
)
959

960
filegroup(
961
    name = "caffe2_serialize_srcs",
962
    srcs = [
963
        "caffe2/serialize/file_adapter.cc",
964
        "caffe2/serialize/inline_container.cc",
965
        "caffe2/serialize/istream_adapter.cc",
966
        "caffe2/serialize/read_adapter_interface.cc",
967
    ],
968
)
969

970
filegroup(
971
    name = "caffe2_sgd_srcs",
972
    srcs = [
973
        "caffe2/sgd/adadelta_op.cc",
974
        "caffe2/sgd/adagrad_op.cc",
975
        "caffe2/sgd/adam_op.cc",
976
        "caffe2/sgd/clip_tensor_op.cc",
977
        "caffe2/sgd/ftrl_op.cc",
978
        "caffe2/sgd/gftrl_op.cc",
979
        "caffe2/sgd/iter_op.cc",
980
        "caffe2/sgd/lars_op.cc",
981
        "caffe2/sgd/learning_rate_adaption_op.cc",
982
        "caffe2/sgd/learning_rate_op.cc",
983
        "caffe2/sgd/momentum_sgd_op.cc",
984
        "caffe2/sgd/rmsprop_op.cc",
985
        "caffe2/sgd/wngrad_op.cc",
986
        "caffe2/sgd/yellowfin_op.cc",
987
    ],
988
)
989

990
filegroup(
991
    name = "caffe2_transforms_srcs",
992
    srcs = [
993
        "caffe2/transforms/common_subexpression_elimination.cc",
994
        "caffe2/transforms/conv_to_nnpack_transform.cc",
995
        "caffe2/transforms/pattern_net_transform.cc",
996
        "caffe2/transforms/single_op_transform.cc",
997
    ],
998
)
999

1000
filegroup(
1001
    name = "caffe2_utils_srcs",
1002
    srcs = [
1003
        "caffe2/utils/bench_utils.cc",
1004
        "caffe2/utils/cpuid.cc",
1005
        "caffe2/utils/math/broadcast.cc",
1006
        "caffe2/utils/math/elementwise.cc",
1007
        "caffe2/utils/math/reduce.cc",
1008
        "caffe2/utils/math/transpose.cc",
1009
        "caffe2/utils/math/utils.cc",
1010
        "caffe2/utils/math_cpu.cc",
1011
        "caffe2/utils/murmur_hash3.cc",
1012
        "caffe2/utils/proto_utils.cc",
1013
        "caffe2/utils/proto_wrap.cc",
1014
        "caffe2/utils/signal_handler.cc",
1015
        "caffe2/utils/smart_tensor_printer.cc",
1016
        "caffe2/utils/string_utils.cc",
1017
        "caffe2/utils/threadpool/ThreadPool.cc",
1018
        "caffe2/utils/threadpool/pthreadpool.cc",
1019
        "caffe2/utils/threadpool/pthreadpool_impl.cc",
1020
        "caffe2/utils/threadpool/thread_pool_guard.cpp",
1021
    ],
1022
)
1023

1024
filegroup(
1025
    name = "caffe2_cuda_cpp_srcs",
1026
    srcs = [
1027
        "caffe2/contrib/aten/aten_op_gpu.cc",
1028
        "caffe2/contrib/gloo/allreduce_ops_gpu.cc",
1029
        "caffe2/contrib/gloo/broadcast_ops_gpu.cc",
1030
        "caffe2/contrib/gloo/common_world_ops_gpu.cc",
1031
        "caffe2/core/blob_serialization_gpu.cc",
1032
        "caffe2/core/common_cudnn.cc",
1033
        "caffe2/core/common_gpu.cc",
1034
        "caffe2/core/event_gpu.cc",
1035
        "caffe2/db/create_db_op_gpu.cc",
1036
        "caffe2/distributed/file_store_handler_op_gpu.cc",
1037
        "caffe2/operators/communicator_op_gpu.cc",
1038
        "caffe2/operators/concat_split_op_gpu.cc",
1039
        "caffe2/operators/conv_op_cache_cudnn.cc",
1040
        "caffe2/operators/conv_op_cudnn.cc",
1041
        "caffe2/operators/conv_op_gpu.cc",
1042
        "caffe2/operators/conv_op_shared_gpu.cc",
1043
        "caffe2/operators/conv_transpose_op_cudnn.cc",
1044
        "caffe2/operators/conv_transpose_op_gpu.cc",
1045
        "caffe2/operators/counter_ops_gpu.cc",
1046
        "caffe2/operators/do_op_gpu.cc",
1047
        "caffe2/operators/dropout_op_cudnn.cc",
1048
        "caffe2/operators/elementwise_add_op_gpu.cc",
1049
        "caffe2/operators/elementwise_sub_op_gpu.cc",
1050
        "caffe2/operators/elu_op_cudnn.cc",
1051
        "caffe2/operators/exp_op_gpu.cc",
1052
        "caffe2/operators/expand_op_gpu.cc",
1053
        "caffe2/operators/expand_squeeze_dims_op_gpu.cc",
1054
        "caffe2/operators/free_op_gpu.cc",
1055
        "caffe2/operators/fully_connected_op_gpu.cc",
1056
        "caffe2/operators/if_op_gpu.cc",
1057
        "caffe2/operators/im2col_op_gpu.cc",
1058
        "caffe2/operators/load_save_op_gpu.cc",
1059
        "caffe2/operators/local_response_normalization_op_cudnn.cc",
1060
        "caffe2/operators/locally_connected_op_gpu.cc",
1061
        "caffe2/operators/log_op_gpu.cc",
1062
        "caffe2/operators/matmul_op_gpu.cc",
1063
        "caffe2/operators/negate_gradient_op_gpu.cc",
1064
        "caffe2/operators/negative_op_gpu.cc",
1065
        "caffe2/operators/order_switch_ops_cudnn.cc",
1066
        "caffe2/operators/order_switch_ops_gpu.cc",
1067
        "caffe2/operators/pool_op_cudnn.cc",
1068
        "caffe2/operators/prepend_dim_op_gpu.cc",
1069
        "caffe2/operators/reshape_op_gpu.cc",
1070
        "caffe2/operators/rnn/recurrent_network_blob_fetcher_op_gpu.cc",
1071
        "caffe2/operators/rnn/recurrent_network_executor_gpu.cc",
1072
        "caffe2/operators/rnn/recurrent_op_cudnn.cc",
1073
        "caffe2/operators/scale_op_gpu.cc",
1074
        "caffe2/operators/shape_op_gpu.cc",
1075
        "caffe2/operators/sigmoid_op_cudnn.cc",
1076
        "caffe2/operators/softmax_op_cudnn.cc",
1077
        "caffe2/operators/sqr_op_gpu.cc",
1078
        "caffe2/operators/sqrt_op_gpu.cc",
1079
        "caffe2/operators/stop_gradient_gpu.cc",
1080
        "caffe2/operators/tanh_op_cudnn.cc",
1081
        "caffe2/operators/tensor_protos_db_input_gpu.cc",
1082
        "caffe2/operators/transpose_op_cudnn.cc",
1083
        "caffe2/operators/while_op_gpu.cc",
1084
        "caffe2/operators/zero_gradient_op_gpu.cc",
1085
        "caffe2/queue/queue_ops_gpu.cc",
1086
        "caffe2/sgd/iter_op_gpu.cc",
1087
        "caffe2/sgd/learning_rate_op_gpu.cc",
1088
    ],
1089
)
1090

1091
filegroup(
1092
    name = "caffe2_cu_srcs",
1093
    srcs = [
1094
        "caffe2/core/context_gpu.cu",
1095
        "caffe2/operators/abs_op.cu",
1096
        "caffe2/operators/accumulate_op.cu",
1097
        "caffe2/operators/accuracy_op.cu",
1098
        "caffe2/operators/acos_op.cu",
1099
        "caffe2/operators/affine_channel_op.cu",
1100
        "caffe2/operators/alias_with_name.cu",
1101
        "caffe2/operators/arg_ops.cu",
1102
        "caffe2/operators/asin_op.cu",
1103
        "caffe2/operators/assert_op.cu",
1104
        "caffe2/operators/atan_op.cu",
1105
        "caffe2/operators/batch_gather_ops.cu",
1106
        "caffe2/operators/batch_matmul_op.cu",
1107
        "caffe2/operators/batch_moments_op.cu",
1108
        "caffe2/operators/batch_permutation_op.cu",
1109
        "caffe2/operators/batch_sparse_to_dense_op.cu",
1110
        "caffe2/operators/boolean_mask_ops.cu",
1111
        "caffe2/operators/boolean_unmask_ops.cu",
1112
        "caffe2/operators/bucketize_op.cu",
1113
        "caffe2/operators/cast_op.cu",
1114
        "caffe2/operators/cbrt_op.cu",
1115
        "caffe2/operators/ceil_op.cu",
1116
        "caffe2/operators/channel_backprop_stats_op.cu",
1117
        "caffe2/operators/channel_shuffle_op.cu",
1118
        "caffe2/operators/channel_stats_op.cu",
1119
        "caffe2/operators/channelwise_conv3d_op_cudnn.cu",
1120
        "caffe2/operators/clip_op.cu",
1121
        "caffe2/operators/copy_op.cu",
1122
        "caffe2/operators/cos_op.cu",
1123
        "caffe2/operators/cosh_op.cu",
1124
        "caffe2/operators/cosine_embedding_criterion_op.cu",
1125
        "caffe2/operators/cross_entropy_op.cu",
1126
        "caffe2/operators/cube_op.cu",
1127
        "caffe2/operators/data_couple_gpu.cu",
1128
        "caffe2/operators/deform_conv_op.cu",
1129
        "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
1130
        "caffe2/operators/distance_op.cu",
1131
        "caffe2/operators/dropout_op.cu",
1132
        "caffe2/operators/elementwise_div_op.cu",
1133
        "caffe2/operators/elementwise_linear_op.cu",
1134
        "caffe2/operators/elementwise_mul_op.cu",
1135
        "caffe2/operators/elementwise_ops.cu",
1136
        "caffe2/operators/elu_op.cu",
1137
        "caffe2/operators/enforce_finite_op.cu",
1138
        "caffe2/operators/ensure_cpu_output_op.cu",
1139
        "caffe2/operators/erf_op.cu",
1140
        "caffe2/operators/filler_op.cu",
1141
        "caffe2/operators/find_op.cu",
1142
        "caffe2/operators/floor_op.cu",
1143
        "caffe2/operators/gather_op.cu",
1144
        "caffe2/operators/gelu_op.cu",
1145
        "caffe2/operators/generate_proposals_op.cu",
1146
        "caffe2/operators/generate_proposals_op_util_nms_gpu.cu",
1147
        "caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cu",
1148
        "caffe2/operators/given_tensor_fill_op.cu",
1149
        "caffe2/operators/glu_op.cu",
1150
        "caffe2/operators/group_norm_op.cu",
1151
        "caffe2/operators/gru_unit_op_gpu.cu",
1152
        "caffe2/operators/half_float_ops.cu",
1153
        "caffe2/operators/hard_sigmoid_op.cu",
1154
        "caffe2/operators/instance_norm_op.cu",
1155
        "caffe2/operators/integral_image_op.cu",
1156
        "caffe2/operators/layer_norm_op.cu",
1157
        "caffe2/operators/leaky_relu_op.cu",
1158
        "caffe2/operators/lengths_pad_op.cu",
1159
        "caffe2/operators/lengths_tile_op.cu",
1160
        "caffe2/operators/local_response_normalization_op.cu",
1161
        "caffe2/operators/logit_op.cu",
1162
        "caffe2/operators/loss_op.cu",
1163
        "caffe2/operators/lp_pool_op.cu",
1164
        "caffe2/operators/lstm_unit_op_gpu.cu",
1165
        "caffe2/operators/margin_ranking_criterion_op.cu",
1166
        "caffe2/operators/max_pool_with_index.cu",
1167
        "caffe2/operators/mean_op.cu",
1168
        "caffe2/operators/mem_query_op.cu",
1169
        "caffe2/operators/minmax_ops.cu",
1170
        "caffe2/operators/moments_op.cu",
1171
        "caffe2/operators/multi_class_accuracy_op.cu",
1172
        "caffe2/operators/normalize_ops.cu",
1173
        "caffe2/operators/one_hot_ops.cu",
1174
        "caffe2/operators/pack_segments.cu",
1175
        "caffe2/operators/pad_op_gpu.cu",
1176
        "caffe2/operators/perplexity_op.cu",
1177
        "caffe2/operators/piecewise_linear_transform_op.cu",
1178
        "caffe2/operators/pool_op.cu",
1179
        "caffe2/operators/pow_op.cu",
1180
        "caffe2/operators/prelu_op.cu",
1181
        "caffe2/operators/reciprocal_op.cu",
1182
        "caffe2/operators/reduce_front_back_max_ops.cu",
1183
        "caffe2/operators/reduce_front_back_sum_mean_ops.cu",
1184
        "caffe2/operators/reduce_ops.cu",
1185
        "caffe2/operators/reduction_ops.cu",
1186
        "caffe2/operators/relu_n_op.cu",
1187
        "caffe2/operators/relu_op.cu",
1188
        "caffe2/operators/replace_nan_op.cu",
1189
        "caffe2/operators/resize_3d_op.cu",
1190
        "caffe2/operators/resize_op.cu",
1191
        "caffe2/operators/reverse_packed_segs_op.cu",
1192
        "caffe2/operators/rmac_regions_op.cu",
1193
        "caffe2/operators/rnn/recurrent_network_op_gpu.cu",
1194
        "caffe2/operators/roi_align_gradient_op.cu",
1195
        "caffe2/operators/roi_align_op.cu",
1196
        "caffe2/operators/roi_align_rotated_gradient_op.cu",
1197
        "caffe2/operators/roi_align_rotated_op.cu",
1198
        "caffe2/operators/roi_pool_op.cu",
1199
        "caffe2/operators/rsqrt_op.cu",
1200
        "caffe2/operators/scale_blobs_op.cu",
1201
        "caffe2/operators/segment_reduction_op_gpu.cu",
1202
        "caffe2/operators/selu_op.cu",
1203
        "caffe2/operators/sequence_ops.cu",
1204
        "caffe2/operators/sigmoid_op.cu",
1205
        "caffe2/operators/sin_op.cu",
1206
        "caffe2/operators/sinh_op.cu",
1207
        "caffe2/operators/slice_op.cu",
1208
        "caffe2/operators/softmax_ops.cu",
1209
        "caffe2/operators/softplus_op.cu",
1210
        "caffe2/operators/softsign_op.cu",
1211
        "caffe2/operators/space_batch_op_gpu.cu",
1212
        "caffe2/operators/sparse_normalize_op_gpu.cu",
1213
        "caffe2/operators/sparse_to_dense_op.cu",
1214
        "caffe2/operators/spatial_batch_norm_op.cu",
1215
        "caffe2/operators/spatial_batch_norm_op_cudnn.cu",
1216
        "caffe2/operators/stump_func_op.cu",
1217
        "caffe2/operators/summarize_op.cu",
1218
        "caffe2/operators/swish_op.cu",
1219
        "caffe2/operators/tan_op.cu",
1220
        "caffe2/operators/tanh_op.cu",
1221
        "caffe2/operators/thresholded_relu_op.cu",
1222
        "caffe2/operators/tile_op.cu",
1223
        "caffe2/operators/top_k.cu",
1224
        "caffe2/operators/transpose_op.cu",
1225
        "caffe2/operators/unique_ops.cu",
1226
        "caffe2/operators/upsample_op.cu",
1227
        "caffe2/operators/utility_ops.cu",
1228
        "caffe2/operators/weighted_sample_op.cu",
1229
        "caffe2/sgd/adadelta_op_gpu.cu",
1230
        "caffe2/sgd/adagrad_op_gpu.cu",
1231
        "caffe2/sgd/adam_op_gpu.cu",
1232
        "caffe2/sgd/fp16_momentum_sgd_op.cu",
1233
        "caffe2/sgd/fp32_momentum_sgd_op.cu",
1234
        "caffe2/sgd/lars_op_gpu.cu",
1235
        "caffe2/sgd/momentum_sgd_op_gpu.cu",
1236
        "caffe2/sgd/rmsprop_op_gpu.cu",
1237
        "caffe2/sgd/yellowfin_op_gpu.cu",
1238
        "caffe2/utils/math/broadcast.cu",
1239
        "caffe2/utils/math/elementwise.cu",
1240
        "caffe2/utils/math/reduce.cu",
1241
        "caffe2/utils/math/transpose.cu",
1242
        "caffe2/utils/math_gpu.cu",
1243
    ],
1244
)
1245

1246
# To achieve finer granularity and make debug easier, caffe2 is split into three libraries:
1247
# ATen, caffe2 and caffe2_for_aten_headers. ATen lib group up source codes under
1248
# aten/ directory and caffe2 contains most files under `caffe2/` directory. Since the
1249
# ATen lib and the caffe2 lib would depend on each other, `caffe2_for_aten_headers` is splitted
1250
# out from `caffe2` to avoid dependency cycle.
1251
cc_library(
1252
    name = "caffe2_for_aten_headers",
1253
    hdrs = [
1254
        "caffe2/core/common.h",
1255
        "caffe2/core/logging.h",
1256
        "caffe2/core/types.h",
1257
        "caffe2/perfkernels/common.h",
1258
        "caffe2/perfkernels/embedding_lookup.h",
1259
        "caffe2/perfkernels/embedding_lookup_idx.h",
1260
        "caffe2/utils/cpuid.h",
1261
        "caffe2/utils/fixed_divisor.h",
1262
    ] + glob([
1263
        "caffe2/utils/threadpool/*.h",
1264
    ]),
1265
    copts = CAFFE2_COPTS,
1266
    visibility = ["//visibility:public"],
1267
    deps = [
1268
        ":caffe2_core_macros",
1269
        "//c10",
1270
        "//caffe2/proto:caffe2_pb",
1271
    ],
1272
)
1273

1274
py_binary(
1275
    name = "gen_op",
1276
    srcs = ["caffe2/contrib/aten/gen_op.py"],
1277
    deps = ["//torchgen"],
1278
)
1279

1280
genrule(
1281
    name = "generated_caffe2_aten_op_headers",
1282
    srcs = [
1283
        "caffe2/contrib/aten/aten_op_template.h",
1284
        "aten/src/ATen/Declarations.yaml",
1285
    ],
1286
    outs = ["caffe2/caffe2/contrib/aten/gen_aten_op.h"],
1287
    cmd = """
1288
    $(location :gen_op) \
1289
        --output_prefix gen_ \
1290
        --install_dir $(@D) \
1291
        --aten_root `dirname $(location aten/src/ATen/Declarations.yaml)`/../.. \
1292
        --template_dir `dirname $(location caffe2/contrib/aten/aten_op_template.h)` \
1293
        --yaml_dir `dirname $(location aten/src/ATen/Declarations.yaml)`""",
1294
    tools = [":gen_op"],
1295
)
1296

1297
cc_library(
1298
    name = "caffe2_headers",
1299
    hdrs = glob(
1300
        [
1301
            "caffe2/contrib/aten/*.h",
1302
            "caffe2/contrib/gloo/*.h",
1303
            "caffe2/core/*.h",
1304
            "caffe2/core/nomnigraph/include/nomnigraph/Converters/*.h",
1305
            "caffe2/core/nomnigraph/include/nomnigraph/Generated/*.h",
1306
            "caffe2/core/nomnigraph/include/nomnigraph/Graph/*.h",
1307
            "caffe2/core/nomnigraph/include/nomnigraph/Representations/*.h",
1308
            "caffe2/core/nomnigraph/include/nomnigraph/Support/*.h",
1309
            "caffe2/core/nomnigraph/include/nomnigraph/Transformations/*.h",
1310
            "caffe2/core/nomnigraph/tests/*.h",
1311
            "caffe2/db/*.h",
1312
            "caffe2/distributed/*.h",
1313
            "caffe2/ideep/*.h",
1314
            "caffe2/ideep/operators/*.h",
1315
            "caffe2/ideep/operators/quantization/*.h",
1316
            "caffe2/ideep/utils/*.h",
1317
            "caffe2/onnx/*.h",
1318
            "caffe2/operators/*.h",
1319
            "caffe2/operators/rnn/*.h",
1320
            "caffe2/opt/*.h",
1321
            "caffe2/perfkernels/*.h",
1322
            "caffe2/predictor/*.h",
1323
            "caffe2/predictor/emulator/*.h",
1324
            "caffe2/quantization/server/*.h",
1325
            "caffe2/queue/*.h",
1326
            "caffe2/serialize/*.h",
1327
            "caffe2/sgd/*.h",
1328
            "caffe2/share/contrib/depthwise/*.h",
1329
            "caffe2/transforms/*.h",
1330
            "caffe2/utils/*.h",
1331
            "caffe2/utils/math/*.h",
1332
            "caffe2/utils/threadpool/*.h",
1333
            "modules/**/*.h",
1334
        ],
1335
        exclude = [
1336
            "caffe2/core/macros.h",
1337
        ],
1338
    ) + if_cuda(glob([
1339
        "caffe2/**/*.cuh",
1340
        "caffe2/image/*.h",
1341
    ])) + [":generated_caffe2_aten_op_headers"],
1342
    copts = CAFFE2_COPTS,
1343
    includes = [
1344
        "caffe2/contrib/aten",
1345
        "caffe2/core/nomnigraph/include",
1346
    ],
1347
    visibility = ["//visibility:public"],
1348
    deps = [
1349
        ":caffe2_core_macros",
1350
        ":caffe2_for_aten_headers",
1351
        "//caffe2/proto:caffe2_pb",
1352
        "//caffe2/proto:cc_proto",
1353
    ],
1354
)
1355

1356
cc_library(
1357
    name = "caffe2_dnnlowp_avx2_ops",
1358
    srcs = [
1359
        "caffe2/quantization/server/elementwise_sum_dnnlowp_op_avx2.cc",
1360
        "caffe2/quantization/server/fully_connected_fake_lowp_op_avx2.cc",
1361
        "caffe2/quantization/server/group_norm_dnnlowp_op_avx2.cc",
1362
        "caffe2/quantization/server/norm_minimization_avx2.cc",
1363
        "caffe2/quantization/server/pool_dnnlowp_op_avx2.cc",
1364
        "caffe2/quantization/server/relu_dnnlowp_op_avx2.cc",
1365
        "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op_avx2.cc",
1366
        "caffe2/quantization/server/transpose.cc",
1367
    ],
1368
    copts = CAFFE2_COPTS + [
1369
        "-mf16c",
1370
        "-mavx2",
1371
        "-mfma",
1372
        "-mxsave",
1373
    ],
1374
    visibility = ["//visibility:public"],
1375
    deps = [
1376
        ":caffe2_headers",
1377
        "@fbgemm",
1378
    ],
1379
    alwayslink = True,
1380
)
1381

1382
cc_library(
1383
    name = "caffe2",
1384
    srcs = [
1385
        "caffe2/db/create_db_op.cc",
1386
        "caffe2/db/protodb.cc",
1387
        "caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc",
1388
        ":caffe2_contrib_srcs",
1389
        ":caffe2_core_srcs",
1390
        ":caffe2_distributed_srcs",
1391
        ":caffe2_ideep_srcs",
1392
        ":caffe2_onnx_srcs",
1393
        ":caffe2_operators_srcs",
1394
        ":caffe2_opt_srcs",
1395
        ":caffe2_perfkernels_srcs",
1396
        ":caffe2_predictor_srcs",
1397
        ":caffe2_quantization_srcs",
1398
        ":caffe2_queue_srcs",
1399
        ":caffe2_serialize_srcs",
1400
        ":caffe2_sgd_srcs",
1401
        ":caffe2_transforms_srcs",
1402
        ":caffe2_utils_srcs",
1403
    ],
1404
    copts = CAFFE2_COPTS + ["-mf16c"],
1405
    linkstatic = 1,
1406
    visibility = ["//visibility:public"],
1407
    deps = [
1408
        ":caffe2_core_macros",
1409
        ":caffe2_dnnlowp_avx2_ops",
1410
        ":caffe2_headers",
1411
        ":caffe2_perfkernels_avx",
1412
        ":caffe2_perfkernels_avx2",
1413
        ":caffe2_perfkernels_avx512",
1414
        "//caffe2/proto:caffe2_pb",
1415
        "//caffe2/proto:cc_proto",
1416
        "//third_party/miniz-2.1.0:miniz",
1417
        "@com_google_protobuf//:protobuf",
1418
        "@eigen",
1419
        "@fbgemm//:fbgemm_src_headers",
1420
        "@fmt",
1421
        "@foxi",
1422
        "@gloo",
1423
        "@onnx",
1424
    ] + if_cuda(
1425
        [
1426
            ":caffe2_cuda_cpp",
1427
            ":aten_cuda",
1428
            "@tensorpipe//:tensorpipe_cuda",
1429
        ],
1430
        [
1431
            ":aten",
1432
            "@tensorpipe//:tensorpipe_cpu",
1433
        ],
1434
    ),
1435
    alwayslink = True,
1436
)
1437

1438
cc_library(
1439
    name = "caffe2_cuda_cpp",
1440
    srcs = [":caffe2_cuda_cpp_srcs"],
1441
    copts = CAFFE2_COPTS,
1442
    visibility = ["//visibility:public"],
1443
    deps = [
1444
        ":caffe2_cuda",
1445
        ":caffe2_headers",
1446
    ],
1447
    alwayslink = True,
1448
)
1449

1450
cu_library(
1451
    name = "caffe2_cuda",
1452
    # one may think that `quantization_gpu.cu` could be a separate kernel,
1453
    # however that leads to de-registration problem that's described in
1454
    # https://github.com/pytorch/pytorch/issues/79236
1455
    # To solve it we add it into the `caffe2_cuda`,
1456
    # this is also aligned with the CMake build.
1457
    srcs = [":caffe2_cu_srcs"] + [
1458
        "torch/csrc/distributed/c10d/intra_node_comm.cu",
1459
        "torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
1460
    ],
1461
    copts = CAFFE2_COPTS + torch_cuda_half_options,
1462
    visibility = ["//visibility:public"],
1463
    deps = [
1464
        ":aten",
1465
        ":caffe2_headers",
1466
        "@cuda//:cublas",
1467
        "@cuda//:curand",
1468
        "@cudnn",
1469
        "@eigen",
1470
        "@gloo",
1471
        "@tensorpipe//:tensorpipe_cuda",
1472
    ],
1473
    alwayslink = True,
1474
)
1475

1476
PERF_COPTS = [
1477
    "-DHAVE_AVX_CPU_DEFINITION",
1478
    "-DHAVE_AVX2_CPU_DEFINITION",
1479
    "-DENABLE_ALIAS=1",
1480
    "-DHAVE_MALLOC_USABLE_SIZE=1",
1481
    "-DHAVE_MMAP=1",
1482
    "-DHAVE_SHM_OPEN=1",
1483
    "-DHAVE_SHM_UNLINK=1",
1484
    "-DSLEEF_STATIC_LIBS=1",
1485
    "-DTH_BALS_MKL",
1486
    "-D_FILE_OFFSET_BITS=64",
1487
    "-DUSE_FBGEMM",
1488
    "-fvisibility-inlines-hidden",
1489
    "-Wunused-parameter",
1490
    "-fno-math-errno",
1491
    "-fno-trapping-math",
1492
    "-mf16c",
1493
]
1494

1495
PERF_HEADERS = glob([
1496
    "caffe2/perfkernels/*.h",
1497
    "caffe2/core/*.h",
1498
])
1499

1500
cc_library(
1501
    name = "caffe2_perfkernels_avx",
1502
    srcs = glob([
1503
        "caffe2/perfkernels/*_avx.cc",
1504
    ]),
1505
    hdrs = PERF_HEADERS,
1506
    copts = PERF_COPTS + [
1507
        "-mavx",
1508
    ],
1509
    visibility = ["//visibility:public"],
1510
    deps = [
1511
        ":caffe2_headers",
1512
        "//c10",
1513
    ],
1514
    alwayslink = True,
1515
)
1516

1517
cc_library(
1518
    name = "caffe2_perfkernels_avx2",
1519
    srcs = glob([
1520
        "caffe2/perfkernels/*_avx2.cc",
1521
    ]),
1522
    hdrs = PERF_HEADERS,
1523
    copts = PERF_COPTS + [
1524
        "-mavx2",
1525
        "-mfma",
1526
        "-mavx",
1527
    ],
1528
    visibility = ["//visibility:public"],
1529
    deps = [
1530
        ":caffe2_headers",
1531
        "//c10",
1532
    ],
1533
    alwayslink = True,
1534
)
1535

1536
cc_library(
1537
    name = "caffe2_perfkernels_avx512",
1538
    srcs = [
1539
        "caffe2/perfkernels/common_avx512.cc",
1540
    ],
1541
    hdrs = PERF_HEADERS,
1542
    copts = PERF_COPTS + [
1543
        "-mavx512f",
1544
        "-mavx512dq",
1545
        "-mavx512vl",
1546
        "-mavx2",
1547
        "-mfma",
1548
        "-mavx",
1549
    ],
1550
    visibility = ["//visibility:public"],
1551
    deps = [
1552
        ":caffe2_headers",
1553
        "//c10",
1554
    ],
1555
    alwayslink = True,
1556
)
1557

1558
# torch
1559
torch_cuda_headers = glob(["torch/csrc/cuda/*.h"])
1560

1561
cc_library(
1562
    name = "torch_headers",
1563
    hdrs = if_cuda(
1564
        torch_cuda_headers,
1565
    ) + glob(
1566
        [
1567
            "torch/*.h",
1568
            "torch/csrc/**/*.h",
1569
            "torch/csrc/distributed/c10d/*.hpp",
1570
            "torch/lib/libshm/*.h",
1571
        ],
1572
        exclude = [
1573
            "torch/csrc/*/generated/*.h",
1574
        ] + torch_cuda_headers,
1575
    ) + GENERATED_AUTOGRAD_CPP + [":version_h"],
1576
    includes = [
1577
        "third_party/kineto/libkineto/include",
1578
        "torch/csrc",
1579
        "torch/csrc/api/include",
1580
        "torch/csrc/distributed",
1581
        "torch/lib",
1582
        "torch/lib/libshm",
1583
    ],
1584
    visibility = ["//visibility:public"],
1585
    deps = [
1586
        ":aten_headers",
1587
        ":caffe2_headers",
1588
        "//c10",
1589
        "@com_github_google_flatbuffers//:flatbuffers",
1590
        "@local_config_python//:python_headers",
1591
        "@onnx",
1592
    ],
1593
    alwayslink = True,
1594
)
1595

1596
TORCH_COPTS = COMMON_COPTS + [
1597
    "-Dtorch_EXPORTS",
1598
    "-DHAVE_AVX_CPU_DEFINITION",
1599
    "-DHAVE_AVX2_CPU_DEFINITION",
1600
    "-DCAFFE2_USE_GLOO",
1601
    "-fvisibility-inlines-hidden",
1602
    "-fno-math-errno ",
1603
    "-fno-trapping-math",
1604
    "-Wno-error=unused-function",
1605
]
1606

1607
torch_sources = {
1608
    k: ""
1609
    for k in (
1610
        libtorch_core_sources +
1611
        libtorch_distributed_sources +
1612
        torch_cpp_srcs +
1613
        libtorch_extra_sources +
1614
        jit_core_sources +
1615
        lazy_tensor_ts_sources +
1616
        GENERATED_AUTOGRAD_CPP
1617
    )
1618
}.keys()
1619

1620
cc_library(
1621
    name = "torch",
1622
    srcs = if_cuda(glob(
1623
        libtorch_cuda_sources,
1624
        exclude = [
1625
            "torch/csrc/cuda/python_nccl.cpp",
1626
            "torch/csrc/cuda/nccl.cpp",
1627
            "torch/csrc/distributed/c10d/intra_node_comm.cu",
1628
            "torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
1629
        ],
1630
    )) + torch_sources,
1631
    copts = TORCH_COPTS,
1632
    defines = [
1633
        "CAFFE2_NIGHTLY_VERSION=20200115",
1634
    ],
1635
    visibility = ["//visibility:public"],
1636
    deps = [
1637
        ":caffe2",
1638
        ":torch_headers",
1639
        "//caffe2/proto:torch_cc_proto",
1640
        "@kineto",
1641
    ] + if_cuda([
1642
        "@cuda//:nvToolsExt",
1643
        "@cutlass",
1644
    ]),
1645
    alwayslink = True,
1646
)
1647

1648
cc_library(
1649
    name = "shm",
1650
    srcs = glob(["torch/lib/libshm/*.cpp"]),
1651
    deps = [
1652
        ":torch",
1653
    ],
1654
)
1655

1656
cc_library(
1657
    name = "libtorch_headers",
1658
    hdrs = glob([
1659
        "**/*.h",
1660
        "**/*.cuh",
1661
    ]) + [
1662
        # We need the filegroup here because the raw list causes Bazel
1663
        # to see duplicate files. It knows how to deduplicate with the
1664
        # filegroup.
1665
        ":cpp_generated_code",
1666
    ],
1667
    includes = [
1668
        "torch/csrc/api/include",
1669
        "torch/csrc/distributed",
1670
        "torch/lib",
1671
        "torch/lib/libshm",
1672
    ],
1673
    visibility = ["//visibility:public"],
1674
    deps = [
1675
        ":torch_headers",
1676
    ],
1677
)
1678

1679
cc_library(
1680
    name = "torch_python",
1681
    srcs = libtorch_python_core_sources
1682
        + if_cuda(libtorch_python_cuda_sources)
1683
        + if_cuda(libtorch_python_distributed_sources)
1684
        + GENERATED_AUTOGRAD_PYTHON,
1685
    hdrs = glob([
1686
        "torch/csrc/generic/*.cpp",
1687
    ]),
1688
    copts = COMMON_COPTS + if_cuda(["-DUSE_CUDA=1"]),
1689
    deps = [
1690
        ":torch",
1691
        ":shm",
1692
        "@pybind11",
1693
    ],
1694
)
1695

1696
pybind_extension(
1697
    name = "torch/_C",
1698
    srcs = ["torch/csrc/stub.c"],
1699
    deps = [
1700
        ":torch_python",
1701
        ":aten_nvrtc",
1702
    ],
1703
)
1704

1705
cc_library(
1706
    name = "functorch",
1707
    hdrs = glob([
1708
        "functorch/csrc/dim/*.h",
1709
    ]),
1710
    srcs = glob([
1711
        "functorch/csrc/dim/*.cpp",
1712
    ]),
1713
    deps = [
1714
        ":aten_nvrtc",
1715
        ":torch_python",
1716
        "@pybind11",
1717
    ],
1718
)
1719

1720
pybind_extension(
1721
    name = "functorch/_C",
1722
    copts=[
1723
        "-DTORCH_EXTENSION_NAME=_C"
1724
    ],
1725
    srcs = [
1726
        "functorch/csrc/init_dim_only.cpp",
1727
    ],
1728
    deps = [
1729
        ":functorch",
1730
        ":torch_python",
1731
        ":aten_nvrtc",
1732
    ],
1733
)
1734

1735
cc_binary(
1736
    name = "torch/bin/torch_shm_manager",
1737
    srcs = [
1738
        "torch/lib/libshm/manager.cpp",
1739
    ],
1740
    deps = [
1741
        ":shm",
1742
    ],
1743
    linkstatic = False,
1744
)
1745

1746
template_rule(
1747
    name = "gen_version_py",
1748
    src = ":torch/version.py.tpl",
1749
    out = "torch/version.py",
1750
    substitutions = if_cuda({
1751
        # Set default to 11.2. Otherwise Torchvision complains about incompatibility.
1752
        "{{CUDA_VERSION}}": "11.2",
1753
        "{{VERSION}}": "2.0.0",
1754
    }, {
1755
        "{{CUDA_VERSION}}": "None",
1756
        "{{VERSION}}": "2.0.0",
1757
    }),
1758
)
1759

1760
py_library(
1761
    name = "pytorch_py",
1762
    visibility = ["//visibility:public"],
1763
    srcs = glob(["torch/**/*.py"], exclude = ["torch/version.py"]) + [":torch/version.py"] + glob(["functorch/**/*.py"]),
1764
    deps = [
1765
        rules.requirement("future"),
1766
        rules.requirement("numpy"),
1767
        rules.requirement("pyyaml"),
1768
        rules.requirement("requests"),
1769
        rules.requirement("setuptools"),
1770
        rules.requirement("six"),
1771
        rules.requirement("sympy"),
1772
        rules.requirement("typing_extensions"),
1773
        "//torchgen",
1774
    ],
1775
    data = [
1776
        ":torch/_C.so",
1777
        ":functorch/_C.so",
1778
        ":torch/bin/torch_shm_manager",
1779
    ],
1780
)
1781

1782
# cpp api tests
1783
cc_library(
1784
    name = "test_support",
1785
    testonly = True,
1786
    srcs = [
1787
        "test/cpp/api/support.cpp",
1788
    ],
1789
    hdrs = [
1790
        "test/cpp/api/init_baseline.h",
1791
        "test/cpp/api/optim_baseline.h",
1792
        "test/cpp/api/support.h",
1793
        "test/cpp/common/support.h",
1794
    ],
1795
    deps = [
1796
        ":torch",
1797
        "@com_google_googletest//:gtest_main",
1798
    ],
1799
)
1800

1801
# Torch integration tests rely on a labeled data set from the MNIST database.
1802
# http://yann.lecun.com/exdb/mnist/
1803

1804
cpp_api_tests = glob(
1805
    ["test/cpp/api/*.cpp"],
1806
    exclude = [
1807
        "test/cpp/api/imethod.cpp",
1808
        "test/cpp/api/integration.cpp",
1809
    ],
1810
)
1811

1812
cc_test(
1813
    name = "integration_test",
1814
    size = "medium",
1815
    srcs = ["test/cpp/api/integration.cpp"],
1816
    data = [
1817
        ":download_mnist",
1818
    ],
1819
    tags = [
1820
        "gpu-required",
1821
    ],
1822
    deps = [
1823
        ":test_support",
1824
        "@com_google_googletest//:gtest_main",
1825
    ],
1826
)
1827

1828
[
1829
    cc_test(
1830
        name = paths.split_extension(paths.basename(filename))[0].replace("-", "_") + "_test",
1831
        size = "medium",
1832
        srcs = [filename],
1833
        deps = [
1834
            ":test_support",
1835
            "@com_google_googletest//:gtest_main",
1836
        ],
1837
    )
1838
    for filename in cpp_api_tests
1839
]
1840

1841
test_suite(
1842
    name = "api_tests",
1843
    tests = [
1844
        "any_test",
1845
        "autograd_test",
1846
        "dataloader_test",
1847
        "enum_test",
1848
        "expanding_array_test",
1849
        "functional_test",
1850
        "init_test",
1851
        "integration_test",
1852
        "jit_test",
1853
        "memory_test",
1854
        "misc_test",
1855
        "module_test",
1856
        "modulelist_test",
1857
        "modules_test",
1858
        "nn_utils_test",
1859
        "optim_test",
1860
        "ordered_dict_test",
1861
        "rnn_test",
1862
        "sequential_test",
1863
        "serialize_test",
1864
        "static_test",
1865
        "tensor_options_test",
1866
        "tensor_test",
1867
        "torch_include_test",
1868
    ],
1869
)
1870

1871
# dist autograd tests
1872
cc_test(
1873
    name = "torch_dist_autograd_test",
1874
    size = "small",
1875
    srcs = ["test/cpp/dist_autograd/test_dist_autograd.cpp"],
1876
    tags = [
1877
        "exclusive",
1878
        "gpu-required",
1879
    ],
1880
    deps = [
1881
        ":torch",
1882
        "@com_google_googletest//:gtest_main",
1883
    ],
1884
)
1885

1886
# jit tests
1887
# Because these individual unit tests require custom registering,
1888
# it is easier to mimic the cmake build by globing together a single test.
1889
cc_test(
1890
    name = "jit_tests",
1891
    size = "small",
1892
    srcs = glob(
1893
        [
1894
            "test/cpp/jit/*.cpp",
1895
            "test/cpp/jit/*.h",
1896
            "test/cpp/tensorexpr/*.cpp",
1897
            "test/cpp/tensorexpr/*.h",
1898
        ],
1899
        exclude = [
1900
            # skip this since <pybind11/embed.h> is not found in OSS build
1901
            "test/cpp/jit/test_exception.cpp",
1902
        ],
1903
    ),
1904
    linkstatic = True,
1905
    tags = [
1906
        "exclusive",
1907
        "gpu-required",
1908
    ],
1909
    deps = [
1910
        ":torch",
1911
        "@com_google_googletest//:gtest_main",
1912
    ],
1913
)
1914

1915
cc_test(
1916
    name = "lazy_tests",
1917
    size = "small",
1918
    srcs = glob(
1919
        [
1920
            "test/cpp/lazy/*.cpp",
1921
            "test/cpp/lazy/*.h",
1922
        ],
1923
        exclude = [
1924
            # skip these since they depend on generated LazyIr.h which isn't available in bazel yet
1925
            "test/cpp/lazy/test_ir.cpp",
1926
            "test/cpp/lazy/test_lazy_ops.cpp",
1927
            "test/cpp/lazy/test_lazy_ops_util.cpp",
1928
        ],
1929
    ),
1930
    linkstatic = True,
1931
    tags = [
1932
        "exclusive",
1933
    ],
1934
    deps = [
1935
        ":torch",
1936
        "@com_google_googletest//:gtest_main",
1937
    ],
1938
)
1939

1940
# python api tests
1941

1942
py_test(
1943
    name = "test_bazel",
1944
    srcs = ["test/_test_bazel.py"],
1945
    main = "test/_test_bazel.py",
1946
    deps = [":pytorch_py"],
1947
)
1948

1949
# all tests
1950
test_suite(
1951
    name = "all_tests",
1952
    tests = [
1953
        "api_tests",
1954
        "jit_tests",
1955
        "torch_dist_autograd_test",
1956
        "//c10/test:tests",
1957
    ],
1958
)
1959

1960
# An internal genrule that we are converging with refers to these file
1961
# as if they are from this package, so we alias them for
1962
# compatibility.
1963

1964
[
1965
    alias(
1966
        name = paths.basename(path),
1967
        actual = path,
1968
    )
1969
    for path in [
1970
        "aten/src/ATen/templates/DispatchKeyNativeFunctions.cpp",
1971
        "aten/src/ATen/templates/DispatchKeyNativeFunctions.h",
1972
        "aten/src/ATen/templates/LazyIr.h",
1973
        "aten/src/ATen/templates/LazyNonNativeIr.h",
1974
        "aten/src/ATen/templates/RegisterDispatchKey.cpp",
1975
        "aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
1976
        "aten/src/ATen/native/native_functions.yaml",
1977
        "aten/src/ATen/native/tags.yaml",
1978
        "aten/src/ATen/native/ts_native_functions.yaml",
1979
        "torch/csrc/lazy/core/shape_inference.h",
1980
        "torch/csrc/lazy/ts_backend/ts_native_functions.cpp",
1981
    ]
1982
]
1983

1984
genrule(
1985
    name = "download_mnist",
1986
    srcs = ["//:tools/download_mnist.py"],
1987
    outs = [
1988
        "mnist/train-images-idx3-ubyte",
1989
        "mnist/train-labels-idx1-ubyte",
1990
        "mnist/t10k-images-idx3-ubyte",
1991
        "mnist/t10k-labels-idx1-ubyte",
1992
    ],
1993
    cmd = "python3 tools/download_mnist.py -d $(RULEDIR)/mnist",
1994
)
1995
pytorch

Использование cookies