pytorch
/
BUILD.bazel
1098 строк · 27.8 Кб
1load("@bazel_skylib//lib:paths.bzl", "paths")
2load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
3load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
4load("@rules_python//python:defs.bzl", "py_library", "py_test")
5load("@pytorch//third_party:substitution.bzl", "header_template_rule", "template_rule")
6load("@pytorch//:tools/bazel.bzl", "rules")
7load("@pytorch//tools/rules:cu.bzl", "cu_library")
8load("@pytorch//tools/config:defs.bzl", "if_cuda")
9load("@pytorch//:aten.bzl", "generate_aten", "intern_build_aten_ops")
10load(":build.bzl", "GENERATED_AUTOGRAD_CPP", "GENERATED_AUTOGRAD_PYTHON", "define_targets")
11load(":build_variables.bzl", "jit_core_sources", "lazy_tensor_ts_sources", "libtorch_core_sources", "libtorch_cuda_sources", "libtorch_distributed_sources", "libtorch_extra_sources", "libtorch_python_core_sources", "torch_cpp_srcs", "libtorch_python_cuda_sources", "libtorch_python_distributed_sources")
12load(":ufunc_defs.bzl", "aten_ufunc_generated_cpu_kernel_sources", "aten_ufunc_generated_cpu_sources", "aten_ufunc_generated_cuda_sources")
13load("//:tools/bazel.bzl", "rules")
14
15define_targets(rules = rules)
16
17COMMON_COPTS = [
18"-DHAVE_MALLOC_USABLE_SIZE=1",
19"-DHAVE_MMAP=1",
20"-DHAVE_SHM_OPEN=1",
21"-DHAVE_SHM_UNLINK=1",
22"-D_FILE_OFFSET_BITS=64",
23"-DUSE_FBGEMM",
24"-DUSE_DISTRIBUTED",
25"-DAT_PER_OPERATOR_HEADERS",
26"-DATEN_THREADING=NATIVE",
27"-DNO_CUDNN_DESTROY_HANDLE",
28] + if_cuda([
29"-DUSE_CUDA",
30"-DUSE_CUDNN",
31# TODO: This should be passed only when building for CUDA-11.5 or newer
32# use cub in a safe manner, see:
33# https://github.com/pytorch/pytorch/pull/55292
34"-DCUB_WRAPPED_NAMESPACE=at_cuda_detail",
35])
36
37aten_generation_srcs = ["aten/src/ATen/native/native_functions.yaml"] + ["aten/src/ATen/native/tags.yaml"] + glob(["aten/src/ATen/templates/**"])
38
39generated_cpu_cpp = [
40"aten/src/ATen/RegisterBackendSelect.cpp",
41"aten/src/ATen/RegisterCPU.cpp",
42"aten/src/ATen/RegisterFunctionalization_0.cpp",
43"aten/src/ATen/RegisterFunctionalization_1.cpp",
44"aten/src/ATen/RegisterFunctionalization_2.cpp",
45"aten/src/ATen/RegisterFunctionalization_3.cpp",
46# "aten/src/ATen/RegisterFunctionalizationEverything.cpp",
47"aten/src/ATen/RegisterMkldnnCPU.cpp",
48"aten/src/ATen/RegisterNestedTensorCPU.cpp",
49"aten/src/ATen/RegisterQuantizedCPU.cpp",
50"aten/src/ATen/RegisterSparseCPU.cpp",
51"aten/src/ATen/RegisterSparseCsrCPU.cpp",
52"aten/src/ATen/RegisterZeroTensor.cpp",
53"aten/src/ATen/RegisterCompositeImplicitAutograd.cpp",
54"aten/src/ATen/RegisterCompositeImplicitAutogradNestedTensor.cpp",
55"aten/src/ATen/RegisterCompositeExplicitAutograd.cpp",
56"aten/src/ATen/RegisterCompositeExplicitAutogradNonFunctional.cpp",
57"aten/src/ATen/RegisterMeta.cpp",
58"aten/src/ATen/RegisterSparseMeta.cpp",
59"aten/src/ATen/RegisterQuantizedMeta.cpp",
60"aten/src/ATen/RegisterNestedTensorMeta.cpp",
61"aten/src/ATen/RegisterSchema.cpp",
62"aten/src/ATen/CPUFunctions.h",
63"aten/src/ATen/CPUFunctions_inl.h",
64"aten/src/ATen/CompositeExplicitAutogradFunctions.h",
65"aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h",
66"aten/src/ATen/CompositeExplicitAutogradNonFunctionalFunctions.h",
67"aten/src/ATen/CompositeExplicitAutogradNonFunctionalFunctions_inl.h",
68"aten/src/ATen/CompositeImplicitAutogradFunctions.h",
69"aten/src/ATen/CompositeImplicitAutogradFunctions_inl.h",
70"aten/src/ATen/CompositeImplicitAutogradNestedTensorFunctions.h",
71"aten/src/ATen/CompositeImplicitAutogradNestedTensorFunctions_inl.h",
72"aten/src/ATen/CompositeViewCopyKernels.cpp",
73"aten/src/ATen/FunctionalInverses.h",
74"aten/src/ATen/Functions.h",
75"aten/src/ATen/Functions.cpp",
76"aten/src/ATen/RedispatchFunctions.h",
77"aten/src/ATen/Operators.h",
78"aten/src/ATen/Operators_0.cpp",
79"aten/src/ATen/Operators_1.cpp",
80"aten/src/ATen/Operators_2.cpp",
81"aten/src/ATen/Operators_3.cpp",
82"aten/src/ATen/Operators_4.cpp",
83"aten/src/ATen/NativeFunctions.h",
84"aten/src/ATen/MetaFunctions.h",
85"aten/src/ATen/MetaFunctions_inl.h",
86"aten/src/ATen/MethodOperators.h",
87"aten/src/ATen/NativeMetaFunctions.h",
88"aten/src/ATen/RegistrationDeclarations.h",
89"aten/src/ATen/VmapGeneratedPlumbing.h",
90"aten/src/ATen/core/aten_interned_strings.h",
91"aten/src/ATen/core/enum_tag.h",
92"aten/src/ATen/core/TensorBody.h",
93"aten/src/ATen/core/TensorMethods.cpp",
94"aten/src/ATen/core/ATenOpList.cpp",
95]
96
97generated_cuda_cpp = [
98"aten/src/ATen/CUDAFunctions.h",
99"aten/src/ATen/CUDAFunctions_inl.h",
100"aten/src/ATen/RegisterCUDA.cpp",
101"aten/src/ATen/RegisterNestedTensorCUDA.cpp",
102"aten/src/ATen/RegisterQuantizedCUDA.cpp",
103"aten/src/ATen/RegisterSparseCUDA.cpp",
104"aten/src/ATen/RegisterSparseCsrCUDA.cpp",
105]
106
107generate_aten(
108name = "generated_aten_cpp",
109srcs = aten_generation_srcs,
110outs = (
111generated_cpu_cpp +
112generated_cuda_cpp +
113aten_ufunc_generated_cpu_sources("aten/src/ATen/{}") +
114aten_ufunc_generated_cpu_kernel_sources("aten/src/ATen/{}") +
115aten_ufunc_generated_cuda_sources("aten/src/ATen/{}") + [
116"aten/src/ATen/Declarations.yaml",
117]
118),
119generator = "//torchgen:gen",
120)
121
122filegroup(
123name = "cpp_generated_code",
124srcs = GENERATED_AUTOGRAD_CPP,
125data = [":generate-code"],
126)
127
128# ATen
129filegroup(
130name = "aten_base_cpp",
131srcs = glob([
132"aten/src/ATen/*.cpp",
133"aten/src/ATen/functorch/*.cpp",
134"aten/src/ATen/detail/*.cpp",
135"aten/src/ATen/cpu/*.cpp",
136]),
137)
138
139filegroup(
140name = "ATen_CORE_SRCS",
141srcs = glob(
142[
143"aten/src/ATen/core/**/*.cpp",
144],
145exclude = [
146"aten/src/ATen/core/**/*_test.cpp",
147],
148),
149)
150
151filegroup(
152name = "aten_native_cpp",
153srcs = glob(["aten/src/ATen/native/*.cpp"]),
154)
155
156filegroup(
157name = "aten_native_sparse_cpp",
158srcs = glob(["aten/src/ATen/native/sparse/*.cpp"]),
159)
160
161filegroup(
162name = "aten_native_nested_cpp",
163srcs = glob(["aten/src/ATen/native/nested/*.cpp"]),
164)
165
166filegroup(
167name = "aten_native_quantized_cpp",
168srcs = glob(
169[
170"aten/src/ATen/native/quantized/*.cpp",
171"aten/src/ATen/native/quantized/cpu/*.cpp",
172],
173),
174)
175
176filegroup(
177name = "aten_native_transformers_cpp",
178srcs = glob(["aten/src/ATen/native/transformers/*.cpp"]),
179)
180
181filegroup(
182name = "aten_native_mkl_cpp",
183srcs = glob([
184"aten/src/ATen/native/mkl/*.cpp",
185"aten/src/ATen/mkl/*.cpp",
186]),
187)
188
189filegroup(
190name = "aten_native_mkldnn_cpp",
191srcs = glob(["aten/src/ATen/native/mkldnn/*.cpp"]),
192)
193
194filegroup(
195name = "aten_native_xnnpack",
196srcs = glob(["aten/src/ATen/native/xnnpack/*.cpp"]),
197)
198
199filegroup(
200name = "aten_base_vulkan",
201srcs = glob(["aten/src/ATen/vulkan/*.cpp"]),
202)
203
204filegroup(
205name = "aten_base_metal",
206srcs = glob(["aten/src/ATen/metal/*.cpp"]),
207)
208
209filegroup(
210name = "ATen_QUANTIZED_SRCS",
211srcs = glob(
212[
213"aten/src/ATen/quantized/**/*.cpp",
214],
215exclude = [
216"aten/src/ATen/quantized/**/*_test.cpp",
217],
218),
219)
220
221filegroup(
222name = "aten_cuda_cpp_srcs",
223srcs = glob(
224[
225"aten/src/ATen/cuda/*.cpp",
226"aten/src/ATen/cuda/detail/*.cpp",
227"aten/src/ATen/cuda/tunable/*.cpp",
228"aten/src/ATen/cudnn/*.cpp",
229"aten/src/ATen/native/cuda/*.cpp",
230"aten/src/ATen/native/cuda/linalg/*.cpp",
231"aten/src/ATen/native/cudnn/*.cpp",
232"aten/src/ATen/native/miopen/*.cpp",
233"aten/src/ATen/native/nested/cuda/*.cpp",
234"aten/src/ATen/native/quantized/cuda/*.cpp",
235"aten/src/ATen/native/quantized/cudnn/*.cpp",
236"aten/src/ATen/native/sparse/cuda/*.cpp",
237"aten/src/ATen/native/transformers/cuda/*.cpp",
238],
239),
240)
241
242filegroup(
243name = "aten_cu_srcs",
244srcs = glob([
245"aten/src/ATen/cuda/*.cu",
246"aten/src/ATen/cuda/detail/*.cu",
247"aten/src/ATen/native/cuda/*.cu",
248"aten/src/ATen/native/nested/cuda/*.cu",
249"aten/src/ATen/native/quantized/cuda/*.cu",
250"aten/src/ATen/native/sparse/cuda/*.cu",
251"aten/src/ATen/native/transformers/cuda/*.cu",
252]) + aten_ufunc_generated_cuda_sources("aten/src/ATen/{}"),
253# It's a bit puzzling to me why it's not necessary to declare the
254# target that generates these sources...
255)
256
257header_template_rule(
258name = "aten_src_ATen_config",
259src = "aten/src/ATen/Config.h.in",
260out = "aten/src/ATen/Config.h",
261include = "aten/src",
262substitutions = {
263"@AT_MKLDNN_ENABLED@": "1",
264"@AT_MKLDNN_ACL_ENABLED@": "0",
265"@AT_MKL_ENABLED@": "1",
266"@AT_MKL_SEQUENTIAL@": "0",
267"@AT_POCKETFFT_ENABLED@": "0",
268"@AT_NNPACK_ENABLED@": "0",
269"@CAFFE2_STATIC_LINK_CUDA_INT@": "0",
270"@AT_BUILD_WITH_BLAS@": "1",
271"@AT_BUILD_WITH_LAPACK@": "1",
272"@AT_PARALLEL_OPENMP@": "0",
273"@AT_PARALLEL_NATIVE@": "1",
274"@AT_BLAS_F2C@": "0",
275"@AT_BLAS_USE_CBLAS_DOT@": "1",
276},
277)
278
279header_template_rule(
280name = "aten_src_ATen_cuda_config",
281src = "aten/src/ATen/cuda/CUDAConfig.h.in",
282out = "aten/src/ATen/cuda/CUDAConfig.h",
283include = "aten/src",
284substitutions = {
285"@AT_CUDNN_ENABLED@": "1",
286"@AT_CUSPARSELT_ENABLED@": "0",
287"@AT_ROCM_ENABLED@": "0",
288"@AT_MAGMA_ENABLED@": "0",
289"@NVCC_FLAGS_EXTRA@": "",
290},
291)
292
293cc_library(
294name = "aten_headers",
295hdrs = [
296"torch/csrc/Export.h",
297"torch/csrc/jit/frontend/function_schema_parser.h",
298] + glob(
299[
300"aten/src/**/*.h",
301"aten/src/**/*.hpp",
302"aten/src/ATen/cuda/**/*.cuh",
303"aten/src/ATen/native/**/*.cuh",
304"aten/src/THC/*.cuh",
305],
306) + [
307":aten_src_ATen_config",
308":generated_aten_cpp",
309],
310includes = [
311"aten/src",
312],
313deps = [
314"//c10",
315],
316)
317
318ATEN_COPTS = COMMON_COPTS + [
319"-DCAFFE2_BUILD_MAIN_LIBS",
320"-DHAVE_AVX_CPU_DEFINITION",
321"-DHAVE_AVX2_CPU_DEFINITION",
322"-fvisibility-inlines-hidden",
323"-fno-math-errno",
324"-fno-trapping-math",
325]
326
327intern_build_aten_ops(
328copts = ATEN_COPTS,
329extra_impls = aten_ufunc_generated_cpu_kernel_sources("aten/src/ATen/{}"),
330deps = [
331":aten_headers",
332"@fbgemm",
333"@mkl",
334"@sleef",
335],
336)
337
338cc_library(
339name = "aten",
340srcs = [
341":ATen_CORE_SRCS",
342":ATen_QUANTIZED_SRCS",
343":aten_base_cpp",
344":aten_base_metal",
345":aten_base_vulkan",
346":aten_native_cpp",
347":aten_native_mkl_cpp",
348":aten_native_mkldnn_cpp",
349":aten_native_nested_cpp",
350":aten_native_quantized_cpp",
351":aten_native_sparse_cpp",
352":aten_native_transformers_cpp",
353":aten_native_xnnpack",
354":aten_src_ATen_config",
355] + generated_cpu_cpp + aten_ufunc_generated_cpu_sources("aten/src/ATen/{}"),
356copts = ATEN_COPTS,
357linkopts = [
358"-ldl",
359],
360data = if_cuda(
361[":libcaffe2_nvrtc.so"],
362[],
363),
364visibility = ["//visibility:public"],
365deps = [
366":ATen_CPU",
367":aten_headers",
368":caffe2_for_aten_headers",
369":torch_headers",
370"@fbgemm",
371"@ideep",
372],
373alwayslink = True,
374)
375
376cc_library(
377name = "aten_nvrtc",
378srcs = glob([
379"aten/src/ATen/cuda/nvrtc_stub/*.cpp",
380]),
381copts = ATEN_COPTS,
382linkstatic = True,
383visibility = ["//visibility:public"],
384deps = [
385":aten_headers",
386"//c10",
387"@cuda",
388"@cuda//:cuda_driver",
389"@cuda//:nvrtc",
390],
391alwayslink = True,
392)
393
394cc_binary(
395name = "libcaffe2_nvrtc.so",
396linkshared = True,
397visibility = ["//visibility:public"],
398deps = [
399":aten_nvrtc",
400],
401)
402
403cc_library(
404name = "aten_cuda_cpp",
405srcs = [":aten_cuda_cpp_srcs"] + generated_cuda_cpp,
406hdrs = [":aten_src_ATen_cuda_config"],
407copts = ATEN_COPTS,
408visibility = ["//visibility:public"],
409deps = [
410":aten",
411"@cuda",
412"@cuda//:cusolver",
413"@cuda//:nvrtc",
414"@cudnn",
415"@cudnn_frontend",
416],
417alwayslink = True,
418)
419
420torch_cuda_half_options = [
421"-DCUDA_HAS_FP16=1",
422"-D__CUDA_NO_HALF_OPERATORS__",
423"-D__CUDA_NO_HALF_CONVERSIONS__",
424"-D__CUDA_NO_BFLOAT16_CONVERSIONS__",
425"-D__CUDA_NO_HALF2_OPERATORS__",
426]
427
428cu_library(
429name = "aten_cuda",
430srcs = [":aten_cu_srcs"],
431copts = ATEN_COPTS + torch_cuda_half_options,
432visibility = ["//visibility:public"],
433deps = [
434":aten_cuda_cpp",
435"//c10/util:bit_cast",
436"@cuda//:cublas",
437"@cuda//:cufft",
438"@cuda//:cusparse",
439"@cutlass",
440],
441alwayslink = True,
442)
443
444# caffe2
445CAFFE2_COPTS = COMMON_COPTS + [
446"-Dcaffe2_EXPORTS",
447"-DCAFFE2_USE_CUDNN",
448"-DCAFFE2_BUILD_MAIN_LIB",
449"-fvisibility-inlines-hidden",
450"-fno-math-errno",
451"-fno-trapping-math",
452]
453
454filegroup(
455name = "caffe2_core_srcs",
456srcs = [
457"caffe2/core/common.cc",
458],
459)
460
461filegroup(
462name = "caffe2_perfkernels_srcs",
463srcs = [
464"caffe2/perfkernels/embedding_lookup_idx.cc",
465],
466)
467
468
469filegroup(
470name = "caffe2_serialize_srcs",
471srcs = [
472"caffe2/serialize/file_adapter.cc",
473"caffe2/serialize/inline_container.cc",
474"caffe2/serialize/istream_adapter.cc",
475"caffe2/serialize/read_adapter_interface.cc",
476],
477)
478
479filegroup(
480name = "caffe2_utils_srcs",
481srcs = [
482"caffe2/utils/proto_wrap.cc",
483"caffe2/utils/string_utils.cc",
484"caffe2/utils/threadpool/ThreadPool.cc",
485"caffe2/utils/threadpool/pthreadpool.cc",
486"caffe2/utils/threadpool/pthreadpool_impl.cc",
487"caffe2/utils/threadpool/thread_pool_guard.cpp",
488],
489)
490
491# To achieve finer granularity and make debug easier, caffe2 is split into three libraries:
492# ATen, caffe2 and caffe2_for_aten_headers. ATen lib group up source codes under
493# aten/ directory and caffe2 contains most files under `caffe2/` directory. Since the
494# ATen lib and the caffe2 lib would depend on each other, `caffe2_for_aten_headers` is splitted
495# out from `caffe2` to avoid dependency cycle.
496cc_library(
497name = "caffe2_for_aten_headers",
498hdrs = [
499"caffe2/core/common.h",
500"caffe2/perfkernels/common.h",
501"caffe2/perfkernels/embedding_lookup_idx.h",
502"caffe2/utils/fixed_divisor.h",
503] + glob([
504"caffe2/utils/threadpool/*.h",
505]),
506copts = CAFFE2_COPTS,
507visibility = ["//visibility:public"],
508deps = [
509":caffe2_core_macros",
510"//c10",
511],
512)
513
514cc_library(
515name = "caffe2_headers",
516hdrs = glob(
517[
518"caffe2/perfkernels/*.h",
519"caffe2/serialize/*.h",
520"caffe2/utils/*.h",
521"caffe2/utils/threadpool/*.h",
522"modules/**/*.h",
523],
524exclude = [
525"caffe2/core/macros.h",
526],
527) + if_cuda(glob([
528"caffe2/**/*.cuh",
529])),
530copts = CAFFE2_COPTS,
531visibility = ["//visibility:public"],
532deps = [
533":caffe2_core_macros",
534":caffe2_for_aten_headers",
535],
536)
537
538cc_library(
539name = "caffe2",
540srcs = [
541":caffe2_core_srcs",
542":caffe2_perfkernels_srcs",
543":caffe2_serialize_srcs",
544":caffe2_utils_srcs",
545],
546copts = CAFFE2_COPTS + ["-mf16c"],
547linkstatic = 1,
548visibility = ["//visibility:public"],
549deps = [
550":caffe2_core_macros",
551":caffe2_headers",
552":caffe2_perfkernels_avx",
553":caffe2_perfkernels_avx2",
554"//third_party/miniz-2.1.0:miniz",
555"@com_google_protobuf//:protobuf",
556"@eigen",
557"@fbgemm//:fbgemm_src_headers",
558"@fmt",
559"@onnx",
560] + if_cuda(
561[
562":aten_cuda",
563"@tensorpipe//:tensorpipe_cuda",
564],
565[
566":aten",
567"@tensorpipe//:tensorpipe_cpu",
568],
569),
570alwayslink = True,
571)
572
573cu_library(
574name = "torch_cuda",
575srcs = [
576"torch/csrc/distributed/c10d/intra_node_comm.cu",
577"torch/csrc/distributed/c10d/NanCheck.cu",
578"torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
579],
580copts = torch_cuda_half_options,
581visibility = ["//visibility:public"],
582deps = [
583":aten",
584"@cuda//:cublas",
585"@cuda//:curand",
586"@cudnn",
587"@eigen",
588"@tensorpipe//:tensorpipe_cuda",
589],
590alwayslink = True,
591)
592
593PERF_COPTS = [
594"-DHAVE_AVX_CPU_DEFINITION",
595"-DHAVE_AVX2_CPU_DEFINITION",
596"-DENABLE_ALIAS=1",
597"-DHAVE_MALLOC_USABLE_SIZE=1",
598"-DHAVE_MMAP=1",
599"-DHAVE_SHM_OPEN=1",
600"-DHAVE_SHM_UNLINK=1",
601"-DSLEEF_STATIC_LIBS=1",
602"-DTH_BALS_MKL",
603"-D_FILE_OFFSET_BITS=64",
604"-DUSE_FBGEMM",
605"-fvisibility-inlines-hidden",
606"-Wunused-parameter",
607"-fno-math-errno",
608"-fno-trapping-math",
609"-mf16c",
610]
611
612PERF_HEADERS = glob([
613"caffe2/perfkernels/*.h",
614"caffe2/core/*.h",
615])
616
617cc_library(
618name = "caffe2_perfkernels_avx",
619srcs = glob([
620"caffe2/perfkernels/*_avx.cc",
621]),
622hdrs = PERF_HEADERS,
623copts = PERF_COPTS + [
624"-mavx",
625],
626visibility = ["//visibility:public"],
627deps = [
628":caffe2_headers",
629"//c10",
630],
631alwayslink = True,
632)
633
634cc_library(
635name = "caffe2_perfkernels_avx2",
636srcs = glob([
637"caffe2/perfkernels/*_avx2.cc",
638]),
639hdrs = PERF_HEADERS,
640copts = PERF_COPTS + [
641"-mavx2",
642"-mfma",
643"-mavx",
644],
645visibility = ["//visibility:public"],
646deps = [
647":caffe2_headers",
648"//c10",
649],
650alwayslink = True,
651)
652
653# torch
654torch_cuda_headers = glob(["torch/csrc/cuda/*.h"])
655
656cc_library(
657name = "torch_headers",
658hdrs = if_cuda(
659torch_cuda_headers,
660) + glob(
661[
662"torch/*.h",
663"torch/csrc/**/*.h",
664"torch/csrc/distributed/c10d/**/*.hpp",
665"torch/lib/libshm/*.h",
666],
667exclude = [
668"torch/csrc/*/generated/*.h",
669] + torch_cuda_headers,
670) + GENERATED_AUTOGRAD_CPP + [":version_h"],
671includes = [
672"third_party/kineto/libkineto/include",
673"torch/csrc",
674"torch/csrc/api/include",
675"torch/csrc/distributed",
676"torch/lib",
677"torch/lib/libshm",
678],
679visibility = ["//visibility:public"],
680deps = [
681":aten_headers",
682":caffe2_headers",
683"//c10",
684"@com_github_google_flatbuffers//:flatbuffers",
685"@local_config_python//:python_headers",
686"@onnx",
687],
688alwayslink = True,
689)
690
691TORCH_COPTS = COMMON_COPTS + [
692"-Dtorch_EXPORTS",
693"-DHAVE_AVX_CPU_DEFINITION",
694"-DHAVE_AVX2_CPU_DEFINITION",
695"-DCAFFE2_USE_GLOO",
696"-fvisibility-inlines-hidden",
697"-fno-math-errno ",
698"-fno-trapping-math",
699"-Wno-error=unused-function",
700]
701
702torch_sources = {
703k: ""
704for k in (
705libtorch_core_sources +
706libtorch_distributed_sources +
707torch_cpp_srcs +
708libtorch_extra_sources +
709jit_core_sources +
710lazy_tensor_ts_sources +
711GENERATED_AUTOGRAD_CPP
712)
713}.keys()
714
715cc_library(
716name = "torch",
717srcs = if_cuda(glob(
718libtorch_cuda_sources,
719exclude = [
720"torch/csrc/cuda/python_nccl.cpp",
721"torch/csrc/cuda/nccl.cpp",
722"torch/csrc/distributed/c10d/intra_node_comm.cu",
723"torch/csrc/distributed/c10d/CUDASymmetricMemory.cu",
724"torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu",
725"torch/csrc/distributed/c10d/NanCheck.cu",
726"torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
727],
728)) + torch_sources,
729copts = TORCH_COPTS,
730linkopts = [
731"-lrt",
732],
733defines = [
734"CAFFE2_NIGHTLY_VERSION=20200115",
735],
736visibility = ["//visibility:public"],
737deps = [
738":caffe2",
739":torch_headers",
740"@kineto",
741"@cpp-httplib",
742"@nlohmann",
743] + if_cuda([
744"@cuda//:nvToolsExt",
745"@cutlass",
746":torch_cuda",
747]),
748alwayslink = True,
749)
750
751cc_library(
752name = "shm",
753srcs = glob(["torch/lib/libshm/*.cpp"]),
754linkopts = [
755"-lrt",
756],
757deps = [
758":torch",
759],
760)
761
762cc_library(
763name = "libtorch_headers",
764hdrs = glob([
765"**/*.h",
766"**/*.cuh",
767]) + [
768# We need the filegroup here because the raw list causes Bazel
769# to see duplicate files. It knows how to deduplicate with the
770# filegroup.
771":cpp_generated_code",
772],
773includes = [
774"torch/csrc/api/include",
775"torch/csrc/distributed",
776"torch/lib",
777"torch/lib/libshm",
778],
779visibility = ["//visibility:public"],
780deps = [
781":torch_headers",
782],
783)
784
785cc_library(
786name = "torch_python",
787srcs = libtorch_python_core_sources
788+ if_cuda(libtorch_python_cuda_sources)
789+ if_cuda(libtorch_python_distributed_sources)
790+ GENERATED_AUTOGRAD_PYTHON,
791hdrs = glob([
792"torch/csrc/generic/*.cpp",
793]),
794copts = COMMON_COPTS + if_cuda(["-DUSE_CUDA=1"]),
795deps = [
796":torch",
797":shm",
798"@pybind11",
799],
800)
801
802pybind_extension(
803name = "torch/_C",
804srcs = ["torch/csrc/stub.c"],
805deps = [
806":torch_python",
807":aten_nvrtc",
808],
809)
810
811cc_library(
812name = "functorch",
813hdrs = glob([
814"functorch/csrc/dim/*.h",
815]),
816srcs = glob([
817"functorch/csrc/dim/*.cpp",
818]),
819deps = [
820":aten_nvrtc",
821":torch_python",
822"@pybind11",
823],
824)
825
826pybind_extension(
827name = "functorch/_C",
828copts=[
829"-DTORCH_EXTENSION_NAME=_C"
830],
831srcs = [
832"functorch/csrc/init_dim_only.cpp",
833],
834deps = [
835":functorch",
836":torch_python",
837":aten_nvrtc",
838],
839)
840
841cc_binary(
842name = "torch/bin/torch_shm_manager",
843srcs = [
844"torch/lib/libshm/manager.cpp",
845],
846deps = [
847":shm",
848],
849linkstatic = False,
850)
851
852template_rule(
853name = "gen_version_py",
854src = ":torch/version.py.tpl",
855out = "torch/version.py",
856substitutions = if_cuda({
857# Set default to 11.2. Otherwise Torchvision complains about incompatibility.
858"{{CUDA_VERSION}}": "11.2",
859"{{VERSION}}": "2.0.0",
860}, {
861"{{CUDA_VERSION}}": "None",
862"{{VERSION}}": "2.0.0",
863}),
864)
865
866py_library(
867name = "pytorch_py",
868visibility = ["//visibility:public"],
869srcs = glob(["torch/**/*.py"], exclude = ["torch/version.py"]) + [":torch/version.py"] + glob(["functorch/**/*.py"]),
870deps = [
871rules.requirement("numpy"),
872rules.requirement("pyyaml"),
873rules.requirement("requests"),
874rules.requirement("setuptools"),
875rules.requirement("sympy"),
876rules.requirement("typing_extensions"),
877"//torchgen",
878],
879data = [
880":torch/_C.so",
881":functorch/_C.so",
882":torch/bin/torch_shm_manager",
883],
884)
885
886# cpp api tests
887cc_library(
888name = "test_support",
889testonly = True,
890srcs = [
891"test/cpp/api/support.cpp",
892],
893hdrs = [
894"test/cpp/api/init_baseline.h",
895"test/cpp/api/optim_baseline.h",
896"test/cpp/api/support.h",
897"test/cpp/common/support.h",
898],
899deps = [
900":torch",
901"@com_google_googletest//:gtest_main",
902],
903)
904
905# Torch integration tests rely on a labeled data set from the MNIST database.
906# http://yann.lecun.com/exdb/mnist/
907
908cpp_api_tests = glob(
909["test/cpp/api/*.cpp"],
910exclude = [
911"test/cpp/api/imethod.cpp",
912"test/cpp/api/integration.cpp",
913],
914)
915
916cc_test(
917name = "integration_test",
918size = "medium",
919srcs = ["test/cpp/api/integration.cpp"],
920data = [
921":download_mnist",
922],
923tags = [
924"gpu-required",
925],
926deps = [
927":test_support",
928"@com_google_googletest//:gtest_main",
929],
930)
931
932[
933cc_test(
934name = paths.split_extension(paths.basename(filename))[0].replace("-", "_") + "_test",
935size = "medium",
936srcs = [filename],
937deps = [
938":test_support",
939"@com_google_googletest//:gtest_main",
940],
941)
942for filename in cpp_api_tests
943]
944
945test_suite(
946name = "api_tests",
947tests = [
948"any_test",
949"autograd_test",
950"dataloader_test",
951"enum_test",
952"expanding_array_test",
953"functional_test",
954"init_test",
955"integration_test",
956"jit_test",
957"memory_test",
958"misc_test",
959"module_test",
960"modulelist_test",
961"modules_test",
962"nn_utils_test",
963"optim_test",
964"ordered_dict_test",
965"rnn_test",
966"sequential_test",
967"serialize_test",
968"static_test",
969"tensor_options_test",
970"tensor_test",
971"torch_include_test",
972],
973)
974
975# dist autograd tests
976cc_test(
977name = "torch_dist_autograd_test",
978size = "small",
979srcs = ["test/cpp/dist_autograd/test_dist_autograd.cpp"],
980tags = [
981"exclusive",
982"gpu-required",
983],
984deps = [
985":torch",
986"@com_google_googletest//:gtest_main",
987],
988)
989
990# jit tests
991# Because these individual unit tests require custom registering,
992# it is easier to mimic the cmake build by globing together a single test.
993cc_test(
994name = "jit_tests",
995size = "small",
996srcs = glob(
997[
998"test/cpp/jit/*.cpp",
999"test/cpp/jit/*.h",
1000"test/cpp/tensorexpr/*.cpp",
1001"test/cpp/tensorexpr/*.h",
1002],
1003exclude = [
1004# skip this since <pybind11/embed.h> is not found in OSS build
1005"test/cpp/jit/test_exception.cpp",
1006],
1007),
1008linkstatic = True,
1009tags = [
1010"exclusive",
1011"gpu-required",
1012],
1013deps = [
1014":torch",
1015"@com_google_googletest//:gtest_main",
1016],
1017)
1018
1019cc_test(
1020name = "lazy_tests",
1021size = "small",
1022srcs = glob(
1023[
1024"test/cpp/lazy/*.cpp",
1025"test/cpp/lazy/*.h",
1026],
1027exclude = [
1028# skip these since they depend on generated LazyIr.h which isn't available in bazel yet
1029"test/cpp/lazy/test_ir.cpp",
1030"test/cpp/lazy/test_lazy_ops.cpp",
1031"test/cpp/lazy/test_lazy_ops_util.cpp",
1032],
1033),
1034linkstatic = True,
1035tags = [
1036"exclusive",
1037],
1038deps = [
1039":torch",
1040"@com_google_googletest//:gtest_main",
1041],
1042)
1043
1044# python api tests
1045
1046py_test(
1047name = "test_bazel",
1048srcs = ["test/_test_bazel.py"],
1049main = "test/_test_bazel.py",
1050deps = [":pytorch_py"],
1051)
1052
1053# all tests
1054test_suite(
1055name = "all_tests",
1056tests = [
1057"api_tests",
1058"jit_tests",
1059"torch_dist_autograd_test",
1060"//c10/test:tests",
1061],
1062)
1063
1064# An internal genrule that we are converging with refers to these file
1065# as if they are from this package, so we alias them for
1066# compatibility.
1067
1068[
1069alias(
1070name = paths.basename(path),
1071actual = path,
1072)
1073for path in [
1074"aten/src/ATen/templates/DispatchKeyNativeFunctions.cpp",
1075"aten/src/ATen/templates/DispatchKeyNativeFunctions.h",
1076"aten/src/ATen/templates/LazyIr.h",
1077"aten/src/ATen/templates/LazyNonNativeIr.h",
1078"aten/src/ATen/templates/RegisterDispatchKey.cpp",
1079"aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
1080"aten/src/ATen/native/native_functions.yaml",
1081"aten/src/ATen/native/tags.yaml",
1082"aten/src/ATen/native/ts_native_functions.yaml",
1083"torch/csrc/lazy/core/shape_inference.h",
1084"torch/csrc/lazy/ts_backend/ts_native_functions.cpp",
1085]
1086]
1087
1088genrule(
1089name = "download_mnist",
1090srcs = ["//:tools/download_mnist.py"],
1091outs = [
1092"mnist/train-images-idx3-ubyte",
1093"mnist/train-labels-idx1-ubyte",
1094"mnist/t10k-images-idx3-ubyte",
1095"mnist/t10k-labels-idx1-ubyte",
1096],
1097cmd = "python3 tools/download_mnist.py -d $(RULEDIR)/mnist",
1098)
1099