1
#include <c10/util/Optional.h>
4
#include <torch/csrc/python_headers.h>
11
#include <ATen/DLConvertor.h>
12
#include <ATen/ExpandUtils.h>
13
#include <ATen/LegacyVmapMode.h>
14
#include <ATen/LinalgBackend.h>
15
#include <ATen/Parallel.h>
16
#include <ATen/Utils.h>
17
#include <ATen/core/Vitals.h>
18
#include <ATen/dlpack.h>
19
#include <ATen/native/ConvUtils.h>
20
#include <ATen/native/ForeachUtils.h>
21
#include <c10/core/DispatchKeySet.h>
22
#include <c10/util/AbortHandler.h>
23
#include <c10/util/Backtrace.h>
24
#include <c10/util/Logging.h>
25
#include <c10/util/irange.h>
27
#include <pybind11/pybind11.h>
28
#include <pybind11/stl.h>
29
#include <torch/csrc/THConcat.h>
30
#include <torch/csrc/utils/pybind.h>
33
#include <unordered_map>
35
#include <ATen/ThreadLocalPythonObjects.h>
36
#include <torch/csrc/DataLoader.h>
37
#include <torch/csrc/Device.h>
38
#include <torch/csrc/Dtype.h>
39
#include <torch/csrc/DynamicTypes.h>
40
#include <torch/csrc/Generator.h>
41
#include <torch/csrc/Layout.h>
42
#include <torch/csrc/MemoryFormat.h>
43
#include <torch/csrc/QScheme.h>
44
#include <torch/csrc/Stream.h>
45
#include <torch/csrc/THP.h>
46
#include <torch/csrc/TypeInfo.h>
47
#include <torch/csrc/api/include/torch/python/init.h>
48
#include <torch/csrc/autograd/generated/python_return_types.h>
49
#include <torch/csrc/autograd/python_cpp_function.h>
50
#include <torch/csrc/autograd/python_enum_tag.h>
51
#include <torch/csrc/autograd/python_fft_functions.h>
52
#include <torch/csrc/autograd/python_function.h>
53
#include <torch/csrc/autograd/python_legacy_variable.h>
54
#include <torch/csrc/autograd/python_linalg_functions.h>
55
#include <torch/csrc/autograd/python_nested_functions.h>
56
#include <torch/csrc/autograd/python_nn_functions.h>
57
#include <torch/csrc/autograd/python_sparse_functions.h>
58
#include <torch/csrc/autograd/python_special_functions.h>
59
#include <torch/csrc/autograd/python_variable.h>
60
#include <torch/csrc/cpu/Module.h>
61
#include <torch/csrc/dynamo/init.h>
62
#include <torch/csrc/functorch/init.h>
63
#include <torch/csrc/inductor/aoti_runner/pybind.h>
64
#include <torch/csrc/jit/python/init.h>
65
#include <torch/csrc/jit/python/python_ir.h>
66
#include <torch/csrc/jit/python/python_tracer.h>
67
#include <torch/csrc/jit/serialization/pickler.h>
68
#include <torch/csrc/lazy/python/init.h>
69
#include <torch/csrc/monitor/python_init.h>
70
#include <torch/csrc/mps/Module.h>
71
#include <torch/csrc/multiprocessing/init.h>
72
#include <torch/csrc/onnx/init.h>
73
#include <torch/csrc/profiler/python/init.h>
74
#include <torch/csrc/tensor/python_tensor.h>
75
#include <torch/csrc/utils/disable_torch_function.h>
76
#include <torch/csrc/utils/init.h>
77
#include <torch/csrc/utils/pycfunction_helpers.h>
78
#include <torch/csrc/utils/python_arg_parser.h>
79
#include <torch/csrc/utils/python_compat.h>
80
#include <torch/csrc/utils/python_dispatch.h>
81
#include <torch/csrc/utils/python_strings.h>
82
#include <torch/csrc/utils/tensor_dtypes.h>
83
#include <torch/csrc/utils/tensor_layouts.h>
84
#include <torch/csrc/utils/tensor_memoryformats.h>
85
#include <torch/csrc/utils/tensor_new.h>
86
#include <torch/csrc/utils/tensor_numpy.h>
87
#include <torch/csrc/utils/tensor_qschemes.h>
88
#include <torch/csrc/utils/verbose.h>
90
#include <ATen/native/transformers/sdp_utils_cpp.h>
91
#include <torch/csrc/profiler/combined_traceback.h>
94
#include <ATen/native/transformers/cuda/sdp_utils.h>
99
#include <torch/csrc/distributed/autograd/python_autograd.h>
100
#include <torch/csrc/distributed/c10d/c10d.h>
101
#include <torch/csrc/distributed/rpc/rpc.h>
102
#include <torch/csrc/distributed/rpc/testing/testing.h>
106
#if defined(USE_VALGRIND)
107
#include <callgrind.h>
110
namespace py = pybind11;
114
THPGenerator* THPDefaultCPUGenerator = nullptr;
116
////////////////////////////////////////////////////////////////////////////////
117
////////////////////////////////////////////////////////////////////////////////
119
static PyObject* THPModule_initNames(PyObject* self, PyObject* arg) {
121
static std::vector<std::string> names;
123
THPObjectPtr types(PySequence_Fast(arg, "expected a sequence"));
127
// NOLINTNEXTLINE(bugprone-branch-clone)
128
auto num_classes = PySequence_Fast_GET_SIZE(types.get());
129
names.reserve(names.size() + num_classes);
130
for (Py_ssize_t i = 0; i < num_classes; i++) {
131
PyObject* obj = PySequence_Fast_GET_ITEM(types.get(), i);
132
TORCH_CHECK(PyType_Check(obj), "expected a PyTypeObject");
133
PyTypeObject* type = (PyTypeObject*)obj;
135
THPObjectPtr module_name(PyObject_GetAttrString(obj, "__module__"));
139
THPUtils_checkString(module_name.get()),
140
"expected __module__ to be a string");
141
std::string name = THPUtils_unpackString(module_name.get());
142
names.emplace_back(name + "." + type->tp_name);
143
type->tp_name = names.back().c_str();
149
// Callback for python part. Used for additional initialization of python
151
static PyObject* THPModule_initExtension(
153
PyObject* shm_manager_path) {
155
#if !defined(FBCODE_CAFFE2)
156
if (torch::get_cpp_stacktraces_enabled() && !torch::get_disable_addr2line()) {
157
c10::SetStackTraceFetcher([]() -> std::string {
158
auto tb = torch::CapturedTraceback::gather(false, false, true);
160
<< "symbolizing C++ stack trace for exception; if this hangs, rerun with TORCH_DISABLE_ADDR2LINE=1..."
162
auto s_tbs = torch::symbolize({tb.get()});
163
std::stringstream oss;
164
oss << "C++ CapturedTraceback:" << std::endl;
165
const auto& s_tb = s_tbs.tracebacks.at(0);
166
for (auto idx : c10::irange(s_tb.size())) {
167
// Skip the first few frames:
168
// #1 torch::CapturedTraceback::gather(bool, bool, bool)
169
// #2 THPModule_initExtension
170
// #3 THPModule_initExtension(_object*, _object*)::{lambda()#1}
174
auto frame_id = s_tb[idx];
175
const auto& frame = s_tbs.all_frames.at(frame_id);
176
oss << "#" << idx << " " << frame.funcname << " from " << frame.filename
177
<< ":" << frame.lineno << std::endl;
183
if (!THPUtils_checkString(shm_manager_path)) {
185
"initialization error - expected bytes/string object as shm_manager_path!");
188
torch::utils::initializeLayouts();
189
torch::utils::initializeMemoryFormats();
190
torch::utils::initializeQSchemes();
191
torch::utils::initializeDtypes();
192
torch::tensors::initialize_python_bindings();
193
std::string path = THPUtils_unpackString(shm_manager_path);
194
libshm_init(path.c_str());
196
auto module = THPObjectPtr(PyImport_ImportModule("torch"));
198
throw python_error();
200
THPStorage_postInit(module);
201
THPAutograd_initFunctions();
206
// The idea behind these two functions is to make it easy to test if we are
207
// built with ASAN: they're designed not to crash if ASAN is not enabled, but
208
// to trigger ASAN if it is enabled. This lets us run a "canary" tests which
209
// checks if our build environment is misconfigured.
211
static PyObject* THPModule_crashIfCsrcASAN(PyObject* module, PyObject* arg) {
214
THPUtils_checkLong(arg),
215
"crash_if_csrc_asan expects an int, but got ",
216
THPUtils_typename(arg));
217
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays, modernize-avoid-c-arrays)
219
x[THPUtils_unpackInt(arg)] = 0;
220
// NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
221
return THPUtils_packInt32(x[0]);
225
static PyObject* THPModule_crashIfCsrcUBSAN(PyObject* module, PyObject* arg) {
228
THPUtils_checkLong(arg),
229
"crash_if_csrc_ubsan expects an int, but got ",
230
THPUtils_typename(arg));
231
int32_t x = THPUtils_unpackInt(arg);
233
return THPUtils_packInt32((int)y);
237
static PyObject* THPModule_crashIfvptrUBSAN(PyObject* module, PyObject* noarg) {
238
// This code should work perfectly fine, as vtables are identical for Foo and
239
// Baz unless rtti and ubsan are enabled
241
virtual int bar() = 0;
242
virtual ~Foo() = default;
248
virtual ~Baz() = default;
251
auto y = static_cast<Foo*>(static_cast<void*>(&x));
253
return THPUtils_packInt32(rc);
256
static PyObject* THPModule_crashIfATenASAN(PyObject* module, PyObject* arg) {
259
THPUtils_checkLong(arg),
260
"crash_if_aten_asan expects an int, "
262
THPUtils_typename(arg));
263
return THPUtils_packInt32(at::_crash_if_asan(THPUtils_unpackInt(arg)));
267
static PyObject* THPModule_abort(PyObject* module, PyObject* noargs) {
272
static PyObject* THPModule_crashIfDebugAssertsFail(
277
THPUtils_checkLong(arg),
278
"crash_if_debug_asserts_fail expects an int, but got ",
279
THPUtils_typename(arg));
280
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
281
THPUtils_unpackInt(arg) != 424242,
282
"Expect anything but 424242 as an input for debug builds");
283
return THPUtils_packInt32(0);
287
static PyObject* THPModule_getNumThreads(PyObject* module, PyObject* noargs) {
288
return THPUtils_packInt32(at::get_num_threads());
291
static PyObject* THPModule_setNumThreads(PyObject* module, PyObject* arg) {
294
THPUtils_checkLong(arg),
295
"set_num_threads expects an int, but got ",
296
THPUtils_typename(arg));
297
int nthreads = (int)THPUtils_unpackLong(arg);
298
TORCH_CHECK(nthreads > 0, "set_num_threads expects a positive integer");
299
at::set_num_threads(nthreads);
304
static PyObject* THPModule_getNumInteropThreads(
307
return THPUtils_packInt32(at::get_num_interop_threads());
310
static PyObject* THPModule_setNumInteropThreads(
315
THPUtils_checkLong(arg),
316
"set_num_interop_threads expects an int, "
318
THPUtils_typename(arg));
319
int nthreads = (int)THPUtils_unpackLong(arg);
321
nthreads > 0, "set_num_interop_threads expects a positive integer");
322
at::set_num_interop_threads(nthreads);
327
PyObject* THPModule_setDefaultTensorType(PyObject* _unused, PyObject* type) {
329
torch::tensors::py_set_default_tensor_type(type);
334
PyObject* THPModule_setDefaultDtype(PyObject* _unused, PyObject* dtype) {
336
torch::tensors::py_set_default_dtype(dtype);
341
PyObject* THPModule_swap_tensor_impl(PyObject* _unused, PyObject* args) {
343
PyObject* a_ = nullptr;
344
PyObject* b_ = nullptr;
345
if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) {
349
// Ensure we have Tensors
350
TORCH_CHECK(THPVariable_Check(a_));
351
TORCH_CHECK(THPVariable_Check(b_));
353
THPVariable* a = reinterpret_cast<THPVariable*>(a_);
354
THPVariable* b = reinterpret_cast<THPVariable*>(b_);
357
a->cdata->use_count() == 1,
358
"Expected single reference to a's Tensor object but got ",
359
a->cdata->use_count());
361
b->cdata->use_count() == 1,
362
"Expected single reference to b's Tensor object but got ",
363
b->cdata->use_count());
364
// weak_use_count() adds 1 if use_count is non-zero
366
a->cdata->weak_use_count() == 1,
367
"Expected no weakrefs to a's Tensor object but got ",
368
a->cdata->weak_use_count() - 1);
370
b->cdata->weak_use_count() == 1,
371
"Expected no weakrefs to b's Tensor object but got ",
372
b->cdata->weak_use_count() - 1);
374
// Swap the Tensor Impl
375
c10::MaybeOwned<at::Tensor> tmp = a->cdata;
377
// The TensorImpls contain PyObjectSlots that have a reference to the PyObject
378
// associated with the TensorImpl. Swap this field as well.
379
c10::optional<PyObject*> mb_obj_a =
380
a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
381
getPyInterpreter(), /*ignore_hermetic_tls=*/false);
382
c10::optional<PyObject*> mb_obj_b =
383
b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
384
getPyInterpreter(), /*ignore_hermetic_tls=*/false);
385
TORCH_INTERNAL_ASSERT(
386
mb_obj_a.has_value() && mb_obj_b.has_value(),
387
"Both tensors should have PyObjects tagged by the current python interpreter");
388
TORCH_CHECK(mb_obj_a.value() == a_);
389
TORCH_CHECK(mb_obj_b.value() == b_);
394
a->cdata->unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(
395
getPyInterpreter(), a_, c10::impl::PyInterpreterStatus::TAGGED_BY_US);
396
b->cdata->unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(
397
getPyInterpreter(), b_, c10::impl::PyInterpreterStatus::TAGGED_BY_US);
403
PyObject* THPModule_addDocStr(PyObject* _unused, PyObject* args) {
404
// adds a __doc__ string to a function, similar to numpy's arr_add_docstring
405
static std::vector<std::string> all_docs;
406
PyObject* obj = nullptr;
407
PyObject* doc_obj = nullptr;
408
if (!PyArg_ParseTuple(args, "OO", &obj, &doc_obj)) {
412
const char* doc_str = "<invalid string>";
413
if (THPUtils_checkString(doc_obj)) {
414
all_docs.push_back(THPUtils_unpackString(doc_obj));
415
doc_str = all_docs.back().c_str();
418
if (Py_TYPE(obj) == &PyCFunction_Type) {
419
PyCFunctionObject* f = (PyCFunctionObject*)obj;
420
if (f->m_ml->ml_doc) {
423
"function '%s' already has a docstring",
426
f->m_ml->ml_doc = doc_str;
427
} else if (strcmp(Py_TYPE(obj)->tp_name, "method_descriptor") == 0) {
428
PyMethodDescrObject* m = (PyMethodDescrObject*)obj;
429
if (m->d_method->ml_doc) {
432
"method '%s' already has a docstring",
433
m->d_method->ml_name);
435
m->d_method->ml_doc = doc_str;
436
} else if (strcmp(Py_TYPE(obj)->tp_name, "getset_descriptor") == 0) {
437
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
438
PyGetSetDescrObject* m = (PyGetSetDescrObject*)obj;
439
if (m->d_getset->doc) {
442
"attribute '%s' already has a docstring",
445
m->d_getset->doc = doc_str;
446
} else if (Py_TYPE(obj) == &PyType_Type) {
447
PyTypeObject* t = (PyTypeObject*)obj;
450
PyExc_RuntimeError, "Type '%s' already has a docstring", t->tp_name);
456
"don't know how to add docstring to type '%s'",
457
Py_TYPE(obj)->tp_name);
464
PyObject* THPModule_inferSize(PyObject* _unused, PyObject* args) {
466
Py_ssize_t num_args = args ? (Py_ssize_t)PyTuple_Size(args) : 0;
467
TORCH_CHECK(num_args == 2, "expected exactly 2 arguments");
468
PyObject* arg1 = PyTuple_GET_ITEM(args, 0);
469
TORCH_CHECK(THPSize_Check(arg1), "expected a torch.Size as argument 1");
470
PyObject* arg2 = PyTuple_GET_ITEM(args, 1);
471
TORCH_CHECK(THPSize_Check(arg2), "expected a torch.Size as argument 2");
473
auto size1 = THPUtils_unpackLongs(arg1);
474
auto size2 = THPUtils_unpackLongs(arg2);
475
auto sizes = at::infer_size(size1, size2);
476
return THPSize_NewFromSizes(static_cast<int64_t>(sizes.size()), sizes.data());
480
static PyObject* THPModule_setBackcompatBroadcastWarn(
486
"set_backcompat_broadcast_warn expects a bool, "
488
THPUtils_typename(arg));
489
setBackCompatBroadcastWarn(arg == Py_True);
494
static PyObject* THPModule_getBackcompatBroadcastWarn(
497
if (getBackCompatBroadcastWarn())
503
static PyObject* THPModule_setBackcompatKeepdimWarn(
509
"set_backcompat_keepdim_warn expects a bool, "
511
THPUtils_typename(arg));
512
setBackCompatKeepdimWarn(arg == Py_True);
517
static PyObject* THPModule_getBackcompatKeepdimWarn(
520
if (getBackCompatKeepdimWarn())
526
PyObject* THPModule_hasDistributed(PyObject* _unused, PyObject* noargs) {
527
#ifdef USE_DISTRIBUTED
534
static PyObject* THPModule_showConfig(PyObject* module, PyObject* noargs) {
536
return THPUtils_packString(at::show_config());
540
static PyObject* THPModule_cxxFlags(PyObject* module, PyObject* noargs) {
542
return THPUtils_packString(at::get_cxx_flags());
546
static PyObject* THPModule_parallelInfo(PyObject* module, PyObject* noargs) {
548
return THPUtils_packString(at::get_parallel_info());
552
static PyObject* THPModule_getCpuCapability(
556
return THPUtils_packString(at::get_cpu_capability());
560
void DLPack_Capsule_Destructor(PyObject* data) {
561
if (C10_LIKELY(!PyCapsule_IsValid(data, "dltensor"))) {
562
// early out, see DLPack spec: if a consuming library sets the capsule
563
// name to something else, they own it and we don't need to do anything
567
// Causes overheads for validity checks again, but this case is rare
568
// since consuming libraries should rename the capsule according to spec.
569
// Note that this cannot set a python error (we checked validity above),
570
// so we don't need to handle python error state here.
571
DLManagedTensor* dlMTensor =
572
(DLManagedTensor*)PyCapsule_GetPointer(data, "dltensor");
573
// the dlMTensor has not been consumed, call deleter ourselves.
574
// DLPack spec mentions that deleter may be NULL, but deleter from
575
// `at::toDLPack` is never NULL, so no need for an additional check here.
576
dlMTensor->deleter(dlMTensor);
577
END_HANDLE_TH_ERRORS_RET()
580
PyObject* THPModule_toDLPack(PyObject* _unused, PyObject* data) {
582
TORCH_CHECK(THPVariable_Check(data), "data must be a Tensor");
583
DLManagedTensor* dlMTensor = at::toDLPack(THPVariable_Unpack(data));
584
return PyCapsule_New(dlMTensor, "dltensor", DLPack_Capsule_Destructor);
588
PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {
589
using namespace torch::autograd;
591
auto tensor = torch::utils::tensor_fromDLPack(data);
592
return THPVariable_Wrap(tensor);
596
PyObject* THModule_getCppBacktrace(PyObject* _unused, PyObject* args) {
598
size_t frames_to_skip = 0;
599
size_t maximum_number_of_frames = 0;
600
if (!PyArg_ParseTuple(
601
args, "LL", &frames_to_skip, &maximum_number_of_frames)) {
604
return THPUtils_packString(
605
c10::get_backtrace(frames_to_skip, maximum_number_of_frames, true));
609
static PyObject* THModule_rename_privateuse1_backend(
614
THPUtils_checkString(arg),
615
"_rename_privateuse1_backend expects a str, but got ",
616
THPUtils_typename(arg));
617
const std::string backend_name = THPUtils_unpackString(arg);
618
c10::register_privateuse1_backend(backend_name);
623
static PyObject* THModule_get_privateuse1_backend_name(
627
return THPUtils_packString(c10::get_privateuse1_backend());
631
PyObject* THPModule_setAllowTF32CuDNN(PyObject* _unused, PyObject* arg) {
635
"set_allow_tf32_cublas expects a bool, "
637
THPUtils_typename(arg));
638
at::globalContext().setAllowTF32CuDNN(arg == Py_True);
643
PyObject* THPModule_allowTF32CuDNN(PyObject* _unused, PyObject* noargs) {
644
if (at::globalContext().allowTF32CuDNN())
650
PyObject* THPModule_setFloat32MatmulPrecision(
655
THPUtils_checkString(arg),
656
"set_float32_matmul_precision expects a str, "
658
THPUtils_typename(arg));
659
std::string s = THPUtils_unpackString(arg);
660
at::globalContext().setFloat32MatmulPrecision(s);
665
PyObject* THPModule_float32MatmulPrecision(
668
std::string s = "highest";
669
auto p = at::globalContext().float32MatmulPrecision();
670
if (p == at::Float32MatmulPrecision::HIGH) {
672
} else if (p == at::Float32MatmulPrecision::MEDIUM) {
675
return THPUtils_packString(s);
677
PyObject* THPModule_setSDPUseFlash(PyObject* _unused, PyObject* arg) {
681
"set_sdp_use_math expects a bool, "
683
THPUtils_typename(arg));
684
at::globalContext().setSDPUseFlash(arg == Py_True);
688
PyObject* THPModule_userEnabledFlashSDP(PyObject* _unused, PyObject* noargs) {
689
if (at::globalContext().userEnabledFlashSDP())
694
PyObject* THPModule_setSDPUseMemEfficient(PyObject* _unused, PyObject* arg) {
698
"set_sdp_use_math expects a bool, "
700
THPUtils_typename(arg));
701
at::globalContext().setSDPUseMemEfficient(arg == Py_True);
705
PyObject* userEnabledMemEfficientSDP(PyObject* _unused, PyObject* noargs) {
706
if (at::globalContext().userEnabledMemEfficientSDP())
711
PyObject* THPModule_setSDPUseMath(PyObject* _unused, PyObject* arg) {
715
"set_sdp_use_math expects a bool, "
717
THPUtils_typename(arg));
718
at::globalContext().setSDPUseMath(arg == Py_True);
722
PyObject* THPModule_userEnabledMathSDP(PyObject* _unused, PyObject* noargs) {
723
if (at::globalContext().userEnabledMathSDP())
728
PyObject* THPModule_setSDPUseCuDNN(PyObject* _unused, PyObject* arg) {
732
"set_sdp_use_cudnn expects a bool, "
734
THPUtils_typename(arg));
735
at::globalContext().setSDPUseCuDNN(arg == Py_True);
739
PyObject* THPModule_userEnabledCuDNNSDP(PyObject* _unused, PyObject* noargs) {
740
if (at::globalContext().userEnabledCuDNNSDP())
746
PyObject* THPModule_setUserEnabledCuDNN(PyObject* _unused, PyObject* arg) {
750
"set_enabled_cudnn expects a bool, "
752
THPUtils_typename(arg));
753
at::globalContext().setUserEnabledCuDNN(arg == Py_True);
758
PyObject* THPModule_userEnabledCuDNN(PyObject* _unused, PyObject* noargs) {
759
if (at::globalContext().userEnabledCuDNN())
765
PyObject* THPModule_setUserEnabledMkldnn(PyObject* _unused, PyObject* arg) {
769
"set_enabled_mkldnn expects a bool, "
771
THPUtils_typename(arg));
772
at::globalContext().setUserEnabledMkldnn(arg == Py_True);
777
PyObject* THPModule_userEnabledMkldnn(PyObject* _unused, PyObject* noargs) {
778
if (at::globalContext().userEnabledMkldnn())
784
PyObject* THPModule_setDeterministicCuDNN(PyObject* _unused, PyObject* arg) {
788
"set_deterministic_cudnn expects a bool, "
790
THPUtils_typename(arg));
791
at::globalContext().setDeterministicCuDNN(arg == Py_True);
796
PyObject* THPModule_deterministicCuDNN(PyObject* _unused, PyObject* noargs) {
797
if (at::globalContext().deterministicCuDNN())
803
PyObject* THPModule_setDeterministicAlgorithms(
808
static torch::PythonArgParser parser(
809
{"_set_deterministic_algorithms(bool mode, *, bool warn_only=False)"});
810
torch::ParsedArgs<2> parsed_args{};
811
auto r = parser.parse(args, kwargs, parsed_args);
812
bool mode = r.toBool(0);
813
bool warn_only = r.toBool(1);
814
at::globalContext().setDeterministicAlgorithms(mode, warn_only);
819
PyObject* THPModule_deterministicAlgorithms(
822
if (at::globalContext().deterministicAlgorithms()) {
828
PyObject* THPModule_deterministicAlgorithmsWarnOnly(
831
if (at::globalContext().deterministicAlgorithmsWarnOnly()) {
837
PyObject* THPModule_setDeterministicFillUninitializedMemory(
842
PyBool_Check(arg), "expected a bool, but got ", THPUtils_typename(arg));
843
at::globalContext().setDeterministicFillUninitializedMemory(arg == Py_True);
848
PyObject* THPModule_deterministicFillUninitializedMemory(
851
if (at::globalContext().deterministicFillUninitializedMemory())
857
PyObject* THPModule_setUserEnabledNNPACK(PyObject* _unused, PyObject* arg) {
861
"set_enabled_NNPACK expects a bool, "
863
THPUtils_typename(arg));
864
at::globalContext().setUserEnabledNNPACK(arg == Py_True);
869
PyObject* THPModule_userEnabledNNPACK(PyObject* _unused, PyObject* noargs) {
870
if (at::globalContext().userEnabledNNPACK())
876
PyObject* THPModule_setWarnAlways(PyObject* _unused, PyObject* arg) {
880
"setWarnOnlyOnce expects a bool, "
882
THPUtils_typename(arg));
883
c10::WarningUtils::set_warnAlways(arg == Py_True);
888
PyObject* THPModule_warnAlways(PyObject* _unused, PyObject* noargs) {
889
if (c10::WarningUtils::get_warnAlways()) {
895
// Used only for testing C++ to Python warning translations.
896
PyObject* THPModule_warn(PyObject* _unused, PyObject* noargs) {
898
TORCH_WARN("Test message for TORCH_WARN");
903
// Used only for testing C++ to Python warning translations.
904
PyObject* THPModule_warnDeprecation(PyObject* _unused, PyObject* noargs) {
906
TORCH_WARN_DEPRECATION("Test message for TORCH_WARN_DEPRECATION");
911
PyObject* THPModule_setBenchmarkCuDNN(PyObject* _unused, PyObject* arg) {
915
"set_benchmark_cudnn expects a bool, "
917
THPUtils_typename(arg));
918
at::globalContext().setBenchmarkCuDNN(arg == Py_True);
923
PyObject* THPModule_benchmarkCuDNN(PyObject* _unused, PyObject* noargs) {
924
if (at::globalContext().benchmarkCuDNN()) {
930
PyObject* THPModule_setAllowTF32CuBLAS(PyObject* _unused, PyObject* arg) {
934
"set_allow_tf32_cublas expects a bool, "
936
THPUtils_typename(arg));
937
at::globalContext().setAllowTF32CuBLAS(arg == Py_True);
942
PyObject* THPModule_allowTF32CuBLAS(PyObject* _unused, PyObject* noargs) {
943
if (at::globalContext().allowTF32CuBLAS()) {
949
PyObject* THPModule_setAllowFP16ReductionCuBLAS(
955
"set_allow_fp16_reduction_cublas expects a bool, "
957
THPUtils_typename(arg));
958
at::globalContext().setAllowFP16ReductionCuBLAS(arg == Py_True);
963
PyObject* THPModule_allowFP16ReductionCuBLAS(
966
if (at::globalContext().allowFP16ReductionCuBLAS()) {
972
PyObject* THPModule_setAllowBF16ReductionCuBLAS(
978
"set_allow_bf16_reduction_cublas expects a bool, "
980
THPUtils_typename(arg));
981
at::globalContext().setAllowBF16ReductionCuBLAS(arg == Py_True);
986
PyObject* THPModule_allowBF16ReductionCuBLAS(
989
if (at::globalContext().allowBF16ReductionCuBLAS()) {
995
PyObject* THPModule_setFlushDenormal(PyObject* _unused, PyObject* arg) {
999
"flush_denormal expects a bool, "
1001
THPUtils_typename(arg));
1002
if (!at::globalContext().setFlushDenormal(arg == Py_True)) {
1006
END_HANDLE_TH_ERRORS
1009
PyObject* THPModule_getDefaultDtype(PyObject* _unused, PyObject* arg) {
1011
auto scalar_type = torch::tensors::get_default_scalar_type();
1012
auto dtype = (PyObject*)torch::getTHPDtype(scalar_type);
1015
END_HANDLE_TH_ERRORS
1018
PyObject* THPModule_getDefaultDevice(PyObject* _unused, PyObject* arg) {
1020
return THPUtils_packString(c10::DeviceTypeName(
1021
dispatchKeyToDeviceType(torch::tensors::get_default_dispatch_key()),
1022
/*lower_case=*/true));
1023
END_HANDLE_TH_ERRORS
1026
PyObject* THPModule_setQEngine(PyObject* /* unused */, PyObject* arg) {
1029
THPUtils_checkLong(arg),
1030
"set_qengine expects an int, "
1032
THPUtils_typename(arg));
1033
auto qengine = THPUtils_unpackLong(arg);
1034
at::globalContext().setQEngine(static_cast<at::QEngine>(qengine));
1036
END_HANDLE_TH_ERRORS
1039
PyObject* THPModule_qEngine(PyObject* _unused, PyObject* noargs) {
1040
return THPUtils_packInt64(
1041
static_cast<int64_t>(at::globalContext().qEngine()));
1044
PyObject* THPModule_supportedQEngines(PyObject* _unused, PyObject* noargs) {
1045
auto qengines = at::globalContext().supportedQEngines();
1047
THPObjectPtr(PyList_New(static_cast<Py_ssize_t>(qengines.size())));
1050
for (const auto i : c10::irange(qengines.size())) {
1051
PyObject* i64 = THPUtils_packInt64(static_cast<int64_t>(qengines[i]));
1054
PyList_SET_ITEM(list.get(), i, i64);
1056
return list.release();
1059
PyObject* THPModule_isEnabledXNNPACK(PyObject* _unused, PyObject* noargs) {
1060
if (at::globalContext().isXNNPACKAvailable())
1066
PyObject* THPModule_setCheckSparseTensorInvariants(
1072
"set_check_sparse_tensor_invariants expects a bool, "
1074
THPUtils_typename(arg));
1075
at::globalContext().setCheckSparseTensorInvariants(arg == Py_True);
1077
END_HANDLE_TH_ERRORS
1080
PyObject* THPModule_checkSparseTensorInvariants(
1083
if (at::globalContext().checkSparseTensorInvariants())
1089
PyObject* THPModule_willEngineExecuteNode(PyObject* _unused, PyObject* arg) {
1091
bool isTHPFunction = THPFunction_Check(arg);
1092
bool isTHPCppFunction = torch::autograd::THPCppFunction_Check(arg);
1094
isTHPFunction || isTHPCppFunction,
1095
"_will_engine_execute_node expects an grad_fn, "
1097
THPUtils_typename(arg));
1098
const auto exec_info = torch::autograd::get_current_graph_task_exec_info();
1101
"_get_should_execute_nodes should only be called during the backward pass");
1102
torch::autograd::Node* node = nullptr;
1103
std::shared_ptr<torch::autograd::Node> node_sp;
1104
if (isTHPFunction) {
1105
node_sp = ((THPFunction*)arg)->cdata.lock();
1106
node = node_sp.get();
1108
node = ((torch::autograd::THPCppFunction*)arg)->cdata.get();
1110
const auto nodes_in_graph =
1111
torch::autograd::get_current_graph_task_nodes_in_graph();
1112
bool ret = nodes_in_graph->find(node) != nodes_in_graph->end();
1113
if (ret && !exec_info->empty()) {
1114
auto it = exec_info->find(node);
1115
if (it == exec_info->end() || !it->second.should_execute()) {
1119
!(node->topological_nr() == 0 && it->second.captures_),
1120
"A leaf node was passed to _will_engine_execute_node but we are "
1121
"currently running autograd.grad(). This is currently not supported.");
1129
END_HANDLE_TH_ERRORS
1132
PyObject* THPModule_getCurrentGraphTaskExecutionOrder(
1136
std::vector<torch::autograd::Node*> nodes =
1137
torch::autograd::get_current_graph_task_execution_order();
1140
"_current_graph_task_execution_order should only be called during the backward pass");
1141
auto list = THPObjectPtr(PyList_New(static_cast<Py_ssize_t>(nodes.size())));
1144
for (const auto i : c10::irange(nodes.size())) {
1145
// This node is guaranteed to be alive since the backward is still running
1146
PyObject* pyobj_node =
1147
torch::autograd::functionToPyObject(nodes[i]->getptr());
1148
PyList_SET_ITEM(list.get(), i, pyobj_node);
1150
return list.release();
1151
END_HANDLE_TH_ERRORS
1154
PyObject* THPModule_getCurrentGraphTaskId(PyObject* _unused, PyObject* noargs) {
1156
return THPUtils_packInt64(torch::autograd::get_current_graph_task_id());
1157
END_HANDLE_TH_ERRORS
1160
PyObject* THPModule_getCurrentNode(PyObject* _unused, PyObject* noargs) {
1162
return torch::autograd::functionToPyObject(
1163
torch::autograd::get_current_node());
1164
END_HANDLE_TH_ERRORS
1167
PyObject* THPModule_setDefaultMobileCPUAllocator(
1171
at::globalContext().setDefaultMobileCPUAllocator();
1173
END_HANDLE_TH_ERRORS
1176
PyObject* THPModule_unsetDefaultMobileCPUAllocator(
1180
at::globalContext().unsetDefaultMobileCPUAllocator();
1182
END_HANDLE_TH_ERRORS
1185
static PyObject* THPModule_vmapmode_increment_nesting(
1189
return THPUtils_packInt64(at::impl::VmapMode::increment_nesting());
1190
END_HANDLE_TH_ERRORS
1193
static PyObject* THPModule_vmapmode_decrement_nesting(
1197
return THPUtils_packInt64(at::impl::VmapMode::decrement_nesting());
1198
END_HANDLE_TH_ERRORS
1201
static PyObject* THPModule_set_display_vmap_fallback_warnings_mode(
1207
"enabled must be a bool, "
1209
THPUtils_typename(arg));
1210
at::globalContext().setDisplayVmapFallbackWarnings(arg == Py_True);
1212
END_HANDLE_TH_ERRORS
1215
static PyObject* THPModule_are_vmap_fallback_warnings_enabled(
1219
if (at::globalContext().areVmapFallbackWarningsEnabled()) {
1224
END_HANDLE_TH_ERRORS
1227
static PyMethodDef TorchMethods[] = { // NOLINT
1228
{"_initExtension", THPModule_initExtension, METH_O, nullptr},
1229
{"_autograd_init", THPAutograd_initExtension, METH_NOARGS, nullptr},
1230
{"_add_docstr", THPModule_addDocStr, METH_VARARGS, nullptr},
1231
{"_swap_tensor_impl", THPModule_swap_tensor_impl, METH_VARARGS, nullptr},
1232
{"_init_names", THPModule_initNames, METH_O, nullptr},
1233
{"_has_distributed", THPModule_hasDistributed, METH_NOARGS, nullptr},
1234
{"_set_default_tensor_type",
1235
THPModule_setDefaultTensorType,
1238
{"_set_default_dtype", THPModule_setDefaultDtype, METH_O, nullptr},
1239
{"_infer_size", THPModule_inferSize, METH_VARARGS, nullptr},
1240
{"_abort", THPModule_abort, METH_NOARGS, nullptr},
1241
{"_crash_if_csrc_asan", THPModule_crashIfCsrcASAN, METH_O, nullptr},
1242
{"_crash_if_csrc_ubsan", THPModule_crashIfCsrcUBSAN, METH_O, nullptr},
1243
{"_crash_if_vptr_ubsan", THPModule_crashIfvptrUBSAN, METH_NOARGS, nullptr},
1244
{"_crash_if_aten_asan", THPModule_crashIfATenASAN, METH_O, nullptr},
1245
{"_crash_if_debug_asserts_fail",
1246
THPModule_crashIfDebugAssertsFail,
1249
{"_show_config", THPModule_showConfig, METH_NOARGS, nullptr},
1250
{"_cxx_flags", THPModule_cxxFlags, METH_NOARGS, nullptr},
1251
{"_parallel_info", THPModule_parallelInfo, METH_NOARGS, nullptr},
1252
{"_get_cpu_capability", THPModule_getCpuCapability, METH_NOARGS, nullptr},
1253
{"_set_backcompat_broadcast_warn",
1254
THPModule_setBackcompatBroadcastWarn,
1257
{"_get_backcompat_broadcast_warn",
1258
THPModule_getBackcompatBroadcastWarn,
1261
{"_set_backcompat_keepdim_warn",
1262
THPModule_setBackcompatKeepdimWarn,
1265
{"_get_backcompat_keepdim_warn",
1266
THPModule_getBackcompatKeepdimWarn,
1269
{"get_num_threads", THPModule_getNumThreads, METH_NOARGS, nullptr},
1270
{"set_num_threads", THPModule_setNumThreads, METH_O, nullptr},
1271
{"get_num_interop_threads",
1272
THPModule_getNumInteropThreads,
1275
{"set_num_interop_threads",
1276
THPModule_setNumInteropThreads,
1279
{"_get_flash_sdp_enabled",
1280
THPModule_userEnabledFlashSDP,
1283
{"_set_sdp_use_flash", THPModule_setSDPUseFlash, METH_O, nullptr},
1284
{"_get_mem_efficient_sdp_enabled",
1285
userEnabledMemEfficientSDP,
1288
{"_set_sdp_use_mem_efficient",
1289
THPModule_setSDPUseMemEfficient,
1292
{"_get_math_sdp_enabled",
1293
THPModule_userEnabledMathSDP,
1296
{"_set_sdp_use_math", THPModule_setSDPUseMath, METH_O, nullptr},
1297
{"_get_cudnn_sdp_enabled",
1298
THPModule_userEnabledCuDNNSDP,
1301
{"_set_sdp_use_cudnn", THPModule_setSDPUseCuDNN, METH_O, nullptr},
1302
{"_get_cudnn_enabled", THPModule_userEnabledCuDNN, METH_NOARGS, nullptr},
1303
{"_set_cudnn_enabled", THPModule_setUserEnabledCuDNN, METH_O, nullptr},
1304
{"_get_mkldnn_enabled", THPModule_userEnabledMkldnn, METH_NOARGS, nullptr},
1305
{"_set_mkldnn_enabled", THPModule_setUserEnabledMkldnn, METH_O, nullptr},
1306
{"_get_cudnn_allow_tf32", THPModule_allowTF32CuDNN, METH_NOARGS, nullptr},
1307
{"_set_cudnn_allow_tf32", THPModule_setAllowTF32CuDNN, METH_O, nullptr},
1308
{"_get_cudnn_benchmark", THPModule_benchmarkCuDNN, METH_NOARGS, nullptr},
1309
{"_set_cudnn_benchmark", THPModule_setBenchmarkCuDNN, METH_O, nullptr},
1310
{"_get_cudnn_deterministic",
1311
THPModule_deterministicCuDNN,
1314
{"_set_cudnn_deterministic",
1315
THPModule_setDeterministicCuDNN,
1318
{"_get_deterministic_algorithms",
1319
THPModule_deterministicAlgorithms,
1322
{"_get_deterministic_algorithms_warn_only",
1323
THPModule_deterministicAlgorithmsWarnOnly,
1326
{"_set_deterministic_algorithms",
1327
castPyCFunctionWithKeywords(THPModule_setDeterministicAlgorithms),
1328
METH_VARARGS | METH_KEYWORDS,
1330
{"_get_deterministic_fill_uninitialized_memory",
1331
THPModule_deterministicFillUninitializedMemory,
1334
{"_set_deterministic_fill_uninitialized_memory",
1335
THPModule_setDeterministicFillUninitializedMemory,
1338
{"_get_nnpack_enabled", THPModule_userEnabledNNPACK, METH_NOARGS, nullptr},
1339
{"_set_nnpack_enabled", THPModule_setUserEnabledNNPACK, METH_O, nullptr},
1340
{"_get_warnAlways", THPModule_warnAlways, METH_NOARGS, nullptr},
1341
{"_set_warnAlways", THPModule_setWarnAlways, METH_O, nullptr},
1342
{"_warn", THPModule_warn, METH_NOARGS, nullptr},
1343
{"_warn_deprecation", THPModule_warnDeprecation, METH_NOARGS, nullptr},
1344
{"_get_cublas_allow_tf32", THPModule_allowTF32CuBLAS, METH_NOARGS, nullptr},
1345
{"_set_cublas_allow_tf32", THPModule_setAllowTF32CuBLAS, METH_O, nullptr},
1346
{"_get_float32_matmul_precision",
1347
THPModule_float32MatmulPrecision,
1350
{"_set_float32_matmul_precision",
1351
THPModule_setFloat32MatmulPrecision,
1354
{"_get_cublas_allow_fp16_reduced_precision_reduction",
1355
THPModule_allowFP16ReductionCuBLAS,
1358
{"_set_cublas_allow_fp16_reduced_precision_reduction",
1359
THPModule_setAllowFP16ReductionCuBLAS,
1362
{"_get_cublas_allow_bf16_reduced_precision_reduction",
1363
THPModule_allowBF16ReductionCuBLAS,
1366
{"_set_cublas_allow_bf16_reduced_precision_reduction",
1367
THPModule_setAllowBF16ReductionCuBLAS,
1370
{"_vmapmode_increment_nesting",
1371
THPModule_vmapmode_increment_nesting,
1374
{"_vmapmode_decrement_nesting",
1375
THPModule_vmapmode_decrement_nesting,
1378
{"_debug_only_display_vmap_fallback_warnings",
1379
THPModule_set_display_vmap_fallback_warnings_mode,
1382
{"_debug_only_are_vmap_fallback_warnings_enabled",
1383
THPModule_are_vmap_fallback_warnings_enabled,
1386
{"_to_dlpack", THPModule_toDLPack, METH_O, nullptr},
1387
{"_from_dlpack", THPModule_fromDLPack, METH_O, nullptr},
1388
{"_get_cpp_backtrace", THModule_getCppBacktrace, METH_VARARGS, nullptr},
1389
{"_rename_privateuse1_backend",
1390
THModule_rename_privateuse1_backend,
1393
{"_get_privateuse1_backend_name",
1394
THModule_get_privateuse1_backend_name,
1397
{"set_flush_denormal", THPModule_setFlushDenormal, METH_O, nullptr},
1398
{"get_default_dtype", THPModule_getDefaultDtype, METH_NOARGS, nullptr},
1399
{"_get_default_device", THPModule_getDefaultDevice, METH_NOARGS, nullptr},
1400
{"_get_qengine", THPModule_qEngine, METH_NOARGS, nullptr},
1401
{"_set_qengine", THPModule_setQEngine, METH_O, nullptr},
1402
{"_supported_qengines", THPModule_supportedQEngines, METH_NOARGS, nullptr},
1403
{"_is_xnnpack_enabled", THPModule_isEnabledXNNPACK, METH_NOARGS, nullptr},
1404
{"_set_check_sparse_tensor_invariants",
1405
THPModule_setCheckSparseTensorInvariants,
1408
{"_check_sparse_tensor_invariants",
1409
THPModule_checkSparseTensorInvariants,
1412
{"_will_engine_execute_node",
1413
THPModule_willEngineExecuteNode,
1416
{"_current_graph_task_execution_order",
1417
THPModule_getCurrentGraphTaskExecutionOrder,
1420
{"_current_graph_task_id",
1421
THPModule_getCurrentGraphTaskId,
1424
{"_current_autograd_node", THPModule_getCurrentNode, METH_NOARGS, nullptr},
1425
{"_set_default_mobile_cpu_allocator",
1426
THPModule_setDefaultMobileCPUAllocator,
1429
{"_unset_default_mobile_cpu_allocator",
1430
THPModule_unsetDefaultMobileCPUAllocator,
1433
{"_is_torch_function_enabled",
1434
THPModule_isEnabledTorchFunction,
1437
{"_disabled_torch_function_impl",
1438
THPModule_disable_torch_function,
1441
{"_disabled_torch_dispatch_impl",
1442
THPModule_disable_torch_dispatch,
1445
{"_has_torch_function", THPModule_has_torch_function, METH_O, nullptr},
1446
{"_has_torch_function_unary",
1447
THPModule_has_torch_function_unary,
1450
{"_has_torch_function_variadic",
1451
(PyCFunction)(void (*)())THPModule_has_torch_function_variadic,
1454
{nullptr, nullptr, 0, nullptr}};
1456
void THCPStream_init(PyObject* module);
1457
void THCPEvent_init(PyObject* module);
1458
void THCPGraph_init(PyObject* module);
1461
PyMethodDef* THCPModule_methods();
1462
namespace torch::cuda {
1463
void initModule(PyObject* module);
1464
} // namespace torch::cuda
1468
PyMethodDef* THXPModule_methods();
1469
void THXPStream_init(PyObject* module);
1470
void THXPEvent_init(PyObject* module);
1471
namespace torch::xpu {
1472
void initModule(PyObject* module);
1473
} // namespace torch::xpu
1477
namespace torch::profiler {
1478
void initIttBindings(PyObject* module);
1479
} // namespace torch::profiler
1482
static std::vector<PyMethodDef> methods;
1484
// In Python we can't use the trick of C10_LOG_API_USAGE_ONCE
1485
// Guaranteed to be invoked from Python under GIL, no locking on map needed
1486
static void LogAPIUsageOnceFromPython(const std::string& event) {
1487
static std::unordered_set<std::string> seen;
1488
if (!seen.count(event)) {
1490
c10::LogAPIUsage(event);
1494
static void LogAPIUsageMetadataFromPython(
1495
const std::string& event,
1496
const std::map<std::string, std::string>& metadata_map) {
1497
c10::LogAPIUsageMetadata(event, metadata_map);
1500
// Weak reference to tensor, used to test a tensor isn't leaked
1501
class WeakTensorRef {
1502
c10::weak_intrusive_ptr<c10::TensorImpl> weakref_;
1505
WeakTensorRef(const at::Tensor& t) : weakref_(t.getIntrusivePtr()) {}
1508
return weakref_.expired();
1512
extern "C" C10_EXPORT PyObject* initModule();
1513
// separate decl and defn for msvc error C2491
1514
PyObject* initModule() {
1518
c10::set_terminate_handler();
1519
at::internal::lazy_init_num_threads();
1521
C10_LOG_API_USAGE_ONCE("torch.python.import");
1523
#define ASSERT_TRUE(cmd) \
1527
THPUtils_addPyMethodDefs(methods, TorchMethods);
1528
THPUtils_addPyMethodDefs(methods, DataLoaderMethods);
1529
THPUtils_addPyMethodDefs(methods, torch::autograd::python_functions());
1530
THPUtils_addPyMethodDefs(methods, torch::multiprocessing::python_functions());
1531
THPUtils_addPyMethodDefs(methods, torch::mps::python_functions());
1533
THPUtils_addPyMethodDefs(methods, THCPModule_methods());
1536
THPUtils_addPyMethodDefs(methods, THXPModule_methods());
1538
#if defined(USE_DISTRIBUTED) && defined(USE_C10D)
1539
THPUtils_addPyMethodDefs(
1540
methods, torch::distributed::c10d::python_functions());
1542
THPUtils_addPyMethodDefs(
1543
methods, torch::distributed::rpc::python_functions());
1544
THPUtils_addPyMethodDefs(
1545
methods, torch::distributed::autograd::python_functions());
1546
THPUtils_addPyMethodDefs(
1547
methods, torch::distributed::rpc::testing::python_functions());
1551
static struct PyModuleDef torchmodule = {
1552
PyModuleDef_HEAD_INIT, "torch._C", nullptr, -1, methods.data()};
1553
module = PyModule_Create(&torchmodule);
1554
ASSERT_TRUE(module);
1555
ASSERT_TRUE(THPGenerator_init(module));
1556
ASSERT_TRUE(THPException_init(module));
1557
THPSize_init(module);
1558
THPDtype_init(module);
1559
THPDTypeInfo_init(module);
1560
THPLayout_init(module);
1561
THPMemoryFormat_init(module);
1562
THPQScheme_init(module);
1563
THPDevice_init(module);
1564
THPStream_init(module);
1565
ASSERT_TRUE(THPVariable_initModule(module));
1566
ASSERT_TRUE(THPFunction_initModule(module));
1567
ASSERT_TRUE(THPEngine_initModule(module));
1568
// NOTE: We need to be able to access OperatorExportTypes from ONNX for use in
1569
// the export side of JIT, so this ONNX init needs to appear before the JIT
1571
torch::onnx::initONNXBindings(module);
1572
torch::autograd::initEnumTag(module);
1573
torch::jit::initJITBindings(module);
1574
torch::monitor::initMonitorBindings(module);
1575
torch::impl::dispatch::initDispatchBindings(module);
1576
torch::dynamo::initDynamoBindings(module);
1577
torch::functorch::impl::initFuncTorchBindings(module);
1578
torch::throughput_benchmark::initThroughputBenchmarkBindings(module);
1579
torch::autograd::initReturnTypes(module);
1580
torch::autograd::initNNFunctions(module);
1581
torch::autograd::initFFTFunctions(module);
1582
torch::autograd::initLinalgFunctions(module);
1583
torch::autograd::initNestedFunctions(module);
1584
torch::autograd::initSparseFunctions(module);
1585
torch::autograd::initSpecialFunctions(module);
1586
torch::autograd::init_legacy_variable(module);
1587
torch::profiler::initPythonBindings(module);
1588
torch::python::init_bindings(module);
1589
torch::lazy::initLazyBindings(module);
1590
torch::inductor::initAOTIRunnerBindings(module);
1592
torch::profiler::initIttBindings(module);
1595
torch::cuda::initModule(module);
1598
torch::xpu::initModule(module);
1600
torch::cpu::initModule(module);
1601
torch::initVerboseBindings(module);
1602
ASSERT_TRUE(THPStorage_init(module));
1605
// This will only initialise base classes and attach them to library namespace
1606
// They won't be ready for real usage until importing cuda module, that will
1607
// complete the process (but it defines Python classes before calling back
1608
// into C, so these lines have to execute first)..
1609
THCPStream_init(module);
1610
THCPEvent_init(module);
1611
THCPGraph_init(module);
1615
THXPStream_init(module);
1616
THXPEvent_init(module);
1619
auto set_module_attr =
1620
[&](const char* name, PyObject* v, bool incref = true) {
1621
// PyModule_AddObject steals reference
1626
int ret = PyModule_AddObject(module, name, v);
1634
#if defined(USE_CUDNN) || defined(USE_ROCM)
1635
PyObject* has_cudnn = Py_True;
1637
PyObject* has_cudnn = Py_False;
1639
ASSERT_TRUE(set_module_attr("_has_cudnn", has_cudnn));
1641
#if AT_MKL_ENABLED() || AT_POCKETFFT_ENABLED()
1642
PyObject* has_spectral = Py_True;
1644
PyObject* has_spectral = Py_False;
1646
ASSERT_TRUE(set_module_attr("has_spectral", has_spectral));
1648
// force ATen to initialize because it handles
1649
// setting up TH Errors so that they throw C++ exceptions
1652
// Automatically translate errors thrown from pybind11 functions
1653
py::register_exception_translator([](std::exception_ptr e) { // NOLINT
1656
std::rethrow_exception(e);
1662
auto py_module = py::reinterpret_borrow<py::module>(module);
1663
py_module.def("_demangle", &c10::demangle);
1664
py_module.def("_log_api_usage_once", &LogAPIUsageOnceFromPython);
1665
py_module.def("_log_api_usage_metadata", &LogAPIUsageMetadataFromPython);
1667
py_module.def("vitals_enabled", &at::vitals::torchVitalEnabled);
1670
[](const std::string& vital,
1671
const std::string& attr,
1672
const std::string& value) {
1673
return at::vitals::VitalsAPI.setVital(vital, attr, value);
1676
"read_vitals", []() { return at::vitals::VitalsAPI.readVitals(); });
1680
torch::wrap_pybind_function(at::init_num_threads),
1684
Initializes the number of parallel threads used on the current thread.
1686
Call this whenever a new thread is created in order to propagate values from
1687
:func:`torch.set_num_threads` onto the new thread.
1691
set_module_attr("has_openmp", at::hasOpenMP() ? Py_True : Py_False));
1692
ASSERT_TRUE(set_module_attr("has_mkl", at::hasMKL() ? Py_True : Py_False));
1694
set_module_attr("has_lapack", at::hasLAPACK() ? Py_True : Py_False));
1696
py_module.def("_valgrind_supported_platform", []() {
1697
#if defined(USE_VALGRIND)
1704
py_module.def("_valgrind_toggle", []() {
1705
#if defined(USE_VALGRIND)
1706
CALLGRIND_TOGGLE_COLLECT;
1708
TORCH_CHECK(false, "Valgrind is not supported.");
1712
py_module.def("_valgrind_toggle_and_dump_stats", []() {
1713
#if defined(USE_VALGRIND)
1714
// NB: If we don't toggle collect around dump stats, callgrind_annotate
1715
// won't process the results correctly. Specifically,
1716
// `callgrind_annotate --inclusive=no` will be almost completely empty.
1717
CALLGRIND_TOGGLE_COLLECT;
1718
CALLGRIND_DUMP_STATS;
1720
TORCH_CHECK(false, "Valgrind is not supported.");
1724
py::class_<WeakTensorRef>(py_module, "_WeakTensorRef")
1725
.def(py::init([](py::object tensor) {
1726
return WeakTensorRef(THPVariable_Unpack(tensor.ptr()));
1728
.def("expired", &WeakTensorRef::expired);
1730
py::enum_<at::native::ConvBackend>(py_module, "_ConvBackend")
1731
.value("CudaDepthwise2d", at::native::ConvBackend::CudaDepthwise2d)
1732
.value("CudaDepthwise3d", at::native::ConvBackend::CudaDepthwise3d)
1733
.value("Cudnn", at::native::ConvBackend::Cudnn)
1734
.value("CudnnTranspose", at::native::ConvBackend::CudnnTranspose)
1735
.value("Empty", at::native::ConvBackend::Empty)
1736
.value("Miopen", at::native::ConvBackend::Miopen)
1737
.value("MiopenDepthwise", at::native::ConvBackend::MiopenDepthwise)
1738
.value("MiopenTranspose", at::native::ConvBackend::MiopenTranspose)
1739
.value("Mkldnn", at::native::ConvBackend::Mkldnn)
1740
.value("MkldnnEmpty", at::native::ConvBackend::MkldnnEmpty)
1741
.value("NnpackSpatial", at::native::ConvBackend::NnpackSpatial)
1742
.value("Overrideable", at::native::ConvBackend::Overrideable)
1743
.value("Slow2d", at::native::ConvBackend::Slow2d)
1744
.value("Slow3d", at::native::ConvBackend::Slow3d)
1745
.value("SlowDilated2d", at::native::ConvBackend::SlowDilated2d)
1746
.value("SlowDilated3d", at::native::ConvBackend::SlowDilated3d)
1747
.value("SlowTranspose2d", at::native::ConvBackend::SlowTranspose2d)
1748
.value("SlowTranspose3d", at::native::ConvBackend::SlowTranspose3d)
1750
"Winograd3x3Depthwise", at::native::ConvBackend::Winograd3x3Depthwise)
1751
.value("Xnnpack2d", at::native::ConvBackend::Xnnpack2d)
1752
.value("Mps", at::native::ConvBackend::Mps)
1753
.value("MpsTranspose,", at::native::ConvBackend::MpsTranspose);
1756
"_select_conv_backend",
1757
[](const at::Tensor& input,
1758
const at::Tensor& weight,
1759
const c10::optional<at::Tensor>& bias_opt,
1760
at::SymIntArrayRef stride_,
1761
at::SymIntArrayRef padding_,
1762
at::SymIntArrayRef dilation_,
1764
at::SymIntArrayRef output_padding_,
1765
c10::SymInt groups_) {
1766
return at::native::select_conv_backend(
1783
py::arg("dilation"),
1784
py::arg("transposed"),
1785
py::arg("output_padding"),
1788
// overload for bias_sizes_opt/backward TODO: figure out default value
1790
"_select_conv_backend",
1791
[](const at::Tensor& input,
1792
const at::Tensor& weight,
1793
const c10::optional<at::Tensor>& bias,
1794
at::SymIntArrayRef stride_,
1795
at::SymIntArrayRef padding_,
1796
at::SymIntArrayRef dilation_,
1798
at::SymIntArrayRef output_padding_,
1799
c10::SymInt groups_,
1800
c10::optional<std::vector<c10::SymInt>> bias_sizes_opt) {
1801
c10::OptionalArrayRef<c10::SymInt> ref = c10::nullopt;
1802
if (bias_sizes_opt) {
1803
ref = (*bias_sizes_opt);
1805
return at::native::select_conv_backend(
1822
py::arg("dilation"),
1823
py::arg("transposed"),
1824
py::arg("output_padding"),
1826
py::arg("bias_sizes"));
1829
"_conv_determine_backend_memory_format",
1830
at::native::_determine_backend_memory_format);
1832
////////////////////////////////////////////////////////////////////////////////
1833
// Scaled Dot Product Attention utilities
1834
////////////////////////////////////////////////////////////////////////////////
1835
py::class_<sdp::sdp_params>(py_module, "_SDPAParams")
1836
.def(py::init([](at::Tensor const& query,
1837
at::Tensor const& key,
1838
at::Tensor const& value,
1839
c10::optional<at::Tensor> attn_mask,
1842
return sdp::sdp_params{
1843
query, key, value, std::move(attn_mask), dropout, is_causal};
1845
.def_readonly("query", &sdp::sdp_params::query)
1846
.def_readonly("key", &sdp::sdp_params::key)
1847
.def_readonly("value", &sdp::sdp_params::value)
1848
.def_readonly("attn_mask", &sdp::sdp_params::attn_mask)
1849
.def_readonly("dropout", &sdp::sdp_params::dropout)
1850
.def_readonly("is_causal", &sdp::sdp_params::is_causal);
1852
py::enum_<sdp::SDPBackend>(
1855
"An enum-like class that contains the different backends for scaled dot product attention.\n\n... warning:: This class is in beta and subject to change.\n\n"
1856
"This backend class is designed to be used with the sdpa_kernel context manager."
1857
"See :func: torch.nn.attention.sdpa_kernel for more details.")
1858
.value("ERROR", sdp::SDPBackend::error)
1859
.value("MATH", sdp::SDPBackend::math)
1860
.value("FLASH_ATTENTION", sdp::SDPBackend::flash_attention)
1861
.value("EFFICIENT_ATTENTION", sdp::SDPBackend::efficient_attention)
1862
.value("CUDNN_ATTENTION", sdp::SDPBackend::cudnn_attention);
1865
"_can_use_flash_attention",
1866
[](const sdp::sdp_params& params, bool debug) {
1868
return sdp::can_use_flash_attention(params, debug);
1874
"_can_use_mem_efficient_attention",
1875
[](const sdp::sdp_params& params, bool debug) {
1877
return sdp::can_use_mem_efficient_attention(params, debug);
1883
py::enum_<at::LinalgBackend>(py_module, "_LinalgBackend")
1884
.value("Default", at::LinalgBackend::Default)
1885
.value("Cusolver", at::LinalgBackend::Cusolver)
1886
.value("Magma", at::LinalgBackend::Magma);
1888
py_module.def("_set_linalg_preferred_backend", [](at::LinalgBackend b) {
1889
at::globalContext().setLinalgPreferredBackend(b);
1891
py_module.def("_get_linalg_preferred_backend", []() {
1892
return at::globalContext().linalgPreferredBackend();
1896
"_construct_storage_from_data_pointer",
1897
[](int64_t data_ptr, c10::Device device, size_t size_bytes) {
1898
return c10::Storage(
1899
c10::Storage::use_byte_size_t(),
1901
// NOLINTNEXTLINE(performance-no-int-to-ptr)
1902
at::DataPtr(reinterpret_cast<void*>(data_ptr), device));
1906
"_stash_obj_in_tls", [](const std::string& key, py::handle arg) {
1907
at::impl::ThreadLocalPythonObjects::get_state().set(
1909
std::make_shared<c10::SafePyObject>(arg.ptr(), getPyInterpreter()));
1912
py_module.def("_get_obj_in_tls", [](const std::string& key) -> py::handle {
1913
auto safe_pyobject =
1914
at::impl::ThreadLocalPythonObjects::get_state().get(key);
1915
auto obj = safe_pyobject->ptr(getPyInterpreter());
1916
return py::handle(obj);
1919
py_module.def("_is_key_in_tls", [](const std::string& key) -> bool {
1920
return at::impl::ThreadLocalPythonObjects::get_state().contains(key);
1924
PyObject* has_cuda = Py_True;
1926
PyObject* has_cuda = Py_False;
1930
PyObject* has_mps = Py_True;
1932
PyObject* has_mps = Py_False;
1936
PyObject* has_xpu = Py_True;
1938
PyObject* has_xpu = Py_False;
1941
ASSERT_TRUE(set_module_attr("_has_cuda", has_cuda));
1943
set_module_attr("_has_magma", at::hasMAGMA() ? Py_True : Py_False));
1944
ASSERT_TRUE(set_module_attr("_has_mps", has_mps));
1945
ASSERT_TRUE(set_module_attr("_has_xpu", has_xpu));
1947
set_module_attr("_has_mkldnn", at::hasMKLDNN() ? Py_True : Py_False));
1949
#ifdef _GLIBCXX_USE_CXX11_ABI
1950
ASSERT_TRUE(set_module_attr(
1951
"_GLIBCXX_USE_CXX11_ABI", _GLIBCXX_USE_CXX11_ABI ? Py_True : Py_False));
1953
ASSERT_TRUE(set_module_attr("_GLIBCXX_USE_CXX11_ABI", Py_False));
1956
// See note [Pybind11 ABI constants]
1957
#define SET_STR_DEFINE(name) \
1958
ASSERT_TRUE(set_module_attr("_" #name, THPUtils_packString(name)))
1960
#ifdef PYBIND11_COMPILER_TYPE
1961
SET_STR_DEFINE(PYBIND11_COMPILER_TYPE);
1964
set_module_attr("_" C10_STRINGIZE(PYBIND11_COMPILER_TYPE), Py_None));
1967
#ifdef PYBIND11_STDLIB
1968
SET_STR_DEFINE(PYBIND11_STDLIB);
1970
ASSERT_TRUE(set_module_attr("_" C10_STRINGIZE(PYBIND11_STDLIB), Py_None));
1973
#ifdef PYBIND11_BUILD_ABI
1974
SET_STR_DEFINE(PYBIND11_BUILD_ABI);
1976
ASSERT_TRUE(set_module_attr("_" C10_STRINGIZE(PYBIND11_BUILD_ABI), Py_None));
1978
#undef SET_STR_DEFINE
1981
"_set_conj", [](const at::Tensor& x, bool conj) { x._set_conj(conj); });
1983
"_set_neg", [](const at::Tensor& x, bool neg) { x._set_neg(neg); });
1984
py_module.def("_get_tensor_metadata", &torch::jit::getTensorMetadata);
1986
"_set_tensor_metadata",
1987
static_cast<void (*)(
1988
const at::Tensor&, std::unordered_map<std::string, bool>)>(
1989
torch::jit::setTensorMetadata));
1990
py_module.def("_dispatch_key_set", [](const at::Tensor& x) {
1991
return toString(x.key_set());
1994
"_has_storage", [](const at::Tensor& x) { return x.has_storage(); });
1996
py_module.def("_set_meta_in_tls_dispatch_include", [](bool meta_in_tls) {
1997
auto local_keyset = c10::impl::tls_local_dispatch_key_set();
1998
c10::DispatchKeySet key_set({at::DispatchKey::Meta});
2000
local_keyset.included_ = local_keyset.included_ | key_set;
2002
local_keyset.included_ =
2003
local_keyset.included_.remove_backend(c10::BackendComponent::MetaBit);
2005
c10::impl::_force_tls_local_dispatch_key_set(local_keyset);
2008
py_module.def("_meta_in_tls_dispatch_include", []() {
2009
auto local_keyset = c10::impl::tls_local_dispatch_key_set();
2010
return local_keyset.included_.has_backend(c10::BackendComponent::MetaBit);
2013
py_module.def("_dump_local_tls_set", []() {
2014
auto local_keyset = c10::impl::tls_local_dispatch_key_set();
2015
std::cout << "Included: " << toString(local_keyset.included_) << "\n";
2016
std::cout << "Excluded: " << toString(local_keyset.excluded_) << "\n";
2020
"_should_allow_numbers_as_tensors", [](const std::string& name) {
2021
return torch::should_allow_numbers_as_tensors(name);
2024
// FIXME(crcrpar): Better to have `at::ScalarType` get mapped to `torch.dtype`
2025
// Currently I see the second item of the key is displayed as
2026
// e.g. `torch._C._te.ScalarType at 0x7fcf318adab0`
2027
// I thought adding an appropriate type_caster of `at::ScalarType` to
2028
// torch/csrc/pybind.h` would solve this but it caused segmentation fault in
2030
using _DeviceDtypeKey = std::pair<at::Device, std::string>;
2031
// Custom hasher is necessary to make unordered_map compilable for Windows
2032
// debug targets. As `at::native::ParamsHash` only works on structs with
2033
// standard layout, but std::string isn't one in Visual C++ debug builds,
2034
// which one can easily verify by running something like:
2036
// #include <type_traits>
2037
// #include <string>
2038
// static_assert(std::is_standard_layout_v<std::string>, "Oh noes");
2039
// If above condition is not met, VC++ raises a very cryptic compilation
2041
// https://github.com/pytorch/pytorch/pull/100007#discussion_r1227116292 for
2043
struct _DeviceDtypeHasher {
2044
std::size_t operator()(const _DeviceDtypeKey& k) const noexcept {
2045
static at::native::ParamsHash<at::Device> device_hasher;
2046
static std::hash<std::string> string_hasher;
2047
return device_hasher(k.first) ^ string_hasher(k.second);
2050
using _FlatMap = std::unordered_map<
2052
at::native::TensorsAndIndicesT,
2053
_DeviceDtypeHasher>;
2055
"_group_tensors_by_device_and_dtype",
2056
[](const std::vector<std::vector<c10::optional<at::Tensor>>>&
2058
const bool with_indices) {
2060
for (const auto& iter :
2061
at::native::_group_tensors_by_first_tensors_device_and_dtype(
2062
nested_tensorlist, with_indices)) {
2063
const auto scalar_type_name =
2064
torch::utils::getDtypeNames(iter.first.second).first;
2065
map.insert({{iter.first.first, scalar_type_name}, iter.second});
2072
[](const at::Tensor& tensor) {
2073
return reinterpret_cast<std::intptr_t>(
2074
tensor.storage().unsafeGetStorageImpl());
2076
"Gets the memory address of the Tensor's StorageImpl.");
2080
[](const at::Tensor& tensor) {
2081
return reinterpret_cast<std::intptr_t>(tensor.storage().data());
2083
"Gets the memory address of the Tensor's data pointer.");
2087
[](const at::Tensor& tensor) {
2088
return c10::impl::cow::is_cow_data_ptr(tensor.storage().data_ptr());
2090
"Checks if a tensor's data pointer is COW");
2092
const auto& defaultGenerator = at::detail::getDefaultCPUGenerator();
2093
THPDefaultCPUGenerator =
2094
(THPGenerator*)THPGenerator_initDefaultGenerator(defaultGenerator);
2095
// This reference is meant to be given away, so no need to incref here.
2096
ASSERT_TRUE(set_module_attr(
2097
"default_generator",
2098
(PyObject*)THPDefaultCPUGenerator,
2099
/* incref= */ false));
2100
ASSERT_TRUE(set_module_attr(
2101
"DisableTorchFunctionSubclass",
2102
(PyObject*)THPModule_DisableTorchFunctionSubclassType(),
2103
/* incref= */ false));
2104
ASSERT_TRUE(set_module_attr(
2105
"DisableTorchFunction",
2106
(PyObject*)THPModule_DisableTorchFunctionType(),
2107
/* incref= */ false));
2108
torch::set_disabled_torch_function_impl(
2109
PyObject_GetAttrString(module, "_disabled_torch_function_impl"));
2110
ASSERT_TRUE(torch::disabled_torch_function_impl() != nullptr);
2111
torch::set_disabled_torch_dispatch_impl(
2112
PyObject_GetAttrString(module, "_disabled_torch_dispatch_impl"));
2113
ASSERT_TRUE(torch::disabled_torch_dispatch_impl() != nullptr);
2115
END_HANDLE_TH_ERRORS
2118
// Checks that the _C shared library isn't initialized multiple times. This
2119
// can happen if the same csrc files are compiled into multiple shared
2121
inline void pytorch_duplicate_guard() {
2122
static int initialized = 0;
2124
fmt::print(stderr, "pytorch: _C shared library re-initialized\n");
2131
struct call_duplicate_guard {
2132
call_duplicate_guard() {
2133
pytorch_duplicate_guard();
2137
static call_duplicate_guard _call_duplicate_guard;