15
from collections import defaultdict
16
from contextlib import ExitStack
17
from datetime import datetime
18
from pathlib import Path
19
from typing import Any, cast, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union
24
import torch.distributed as dist
25
from torch.multiprocessing import current_process, get_context
26
from torch.testing._internal.common_utils import (
38
TEST_WITH_SLOW_GRADCHECK,
43
REPO_ROOT = Path(__file__).resolve().parent.parent
44
sys.path.insert(0, str(REPO_ROOT))
46
from tools.stats.import_test_stats import (
47
ADDITIONAL_CI_FILES_FOLDER,
48
TEST_CLASS_TIMES_FILE,
51
from tools.stats.upload_metrics import add_global_metric, emit_metric
52
from tools.testing.discover_tests import (
59
from tools.testing.do_target_determination_for_s3 import import_results
60
from tools.testing.target_determination.gen_artifact import gen_ci_artifact
61
from tools.testing.target_determination.heuristics.previously_failed_in_pr import (
62
gen_additional_test_failures_file,
64
from tools.testing.target_determination.heuristics.utils import get_pr_number
65
from tools.testing.test_run import TestRun
66
from tools.testing.test_selections import (
68
get_test_case_configs,
76
sys.path.remove(str(REPO_ROOT))
79
HAVE_TEST_SELECTION_TOOLS = True
80
TEST_CONFIG = os.getenv("TEST_CONFIG", "")
81
BUILD_ENVIRONMENT = os.getenv("BUILD_ENVIRONMENT", "")
82
RERUN_DISABLED_TESTS = os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1"
83
DISTRIBUTED_TEST_PREFIX = "distributed"
84
INDUCTOR_TEST_PREFIX = "inductor"
85
IS_SLOW = "slow" in TEST_CONFIG or "slow" in BUILD_ENVIRONMENT
99
def maybe_set_hip_visible_devies():
101
if torch.version.hip:
102
p = current_process()
103
if p.name != "MainProcess":
105
os.environ["HIP_VISIBLE_DEVICES"] = str(p._identity[0] % NUM_PROCS)
109
return s.lower() not in {"", "0", "false", "off"}
112
class TestChoices(list):
113
def __init__(self, *args, **kwargs):
114
super().__init__(args[0])
116
def __contains__(self, item):
117
return list.__contains__(self, parse_test_module(item))
120
FSDP_TEST = [test for test in TESTS if test.startswith("distributed/fsdp")]
123
"distributed/nn/jit/test_instantiator",
124
"distributed/rpc/test_faulty_agent",
125
"distributed/rpc/test_tensorpipe_agent",
126
"distributed/rpc/test_share_memory",
127
"distributed/rpc/cuda/test_tensorpipe_agent",
128
"distributed/pipeline/sync/skip/test_api",
129
"distributed/pipeline/sync/skip/test_gpipe",
130
"distributed/pipeline/sync/skip/test_inspect_skip_layout",
131
"distributed/pipeline/sync/skip/test_leak",
132
"distributed/pipeline/sync/skip/test_portal",
133
"distributed/pipeline/sync/skip/test_stash_pop",
134
"distributed/pipeline/sync/skip/test_tracker",
135
"distributed/pipeline/sync/skip/test_verify_skippables",
136
"distributed/pipeline/sync/test_balance",
137
"distributed/pipeline/sync/test_bugs",
138
"distributed/pipeline/sync/test_checkpoint",
139
"distributed/pipeline/sync/test_copy",
140
"distributed/pipeline/sync/test_deferred_batch_norm",
141
"distributed/pipeline/sync/test_dependency",
142
"distributed/pipeline/sync/test_inplace",
143
"distributed/pipeline/sync/test_microbatch",
144
"distributed/pipeline/sync/test_phony",
145
"distributed/pipeline/sync/test_pipe",
146
"distributed/pipeline/sync/test_pipeline",
147
"distributed/pipeline/sync/test_stream",
148
"distributed/pipeline/sync/test_transparency",
149
"distributed/pipeline/sync/test_worker",
150
"distributed/elastic/agent/server/test/api_test",
151
"distributed/elastic/multiprocessing/api_test",
152
"distributed/_shard/checkpoint/test_checkpoint"
153
"distributed/_shard/checkpoint/test_file_system_checkpoint"
154
"distributed/_shard/sharding_spec/test_sharding_spec",
155
"distributed/_shard/sharding_plan/test_sharding_plan",
156
"distributed/_shard/sharded_tensor/test_sharded_tensor",
157
"distributed/_shard/sharded_tensor/test_sharded_tensor_reshard",
158
"distributed/_shard/sharded_tensor/ops/test_embedding",
159
"distributed/_shard/sharded_tensor/ops/test_embedding_bag",
160
"distributed/_shard/sharded_tensor/ops/test_binary_cmp",
161
"distributed/_shard/sharded_tensor/ops/test_init",
162
"distributed/_shard/sharded_optim/test_sharded_optim",
166
"distributed/rpc/test_faulty_agent",
167
"distributed/rpc/test_tensorpipe_agent",
168
"distributed/rpc/test_share_memory",
169
"distributed/rpc/cuda/test_tensorpipe_agent",
170
"distributed/_shard/checkpoint/test_checkpoint"
171
"distributed/_shard/checkpoint/test_file_system_checkpoint"
172
"distributed/_shard/sharding_spec/test_sharding_spec",
173
"distributed/_shard/sharding_plan/test_sharding_plan",
174
"distributed/_shard/sharded_tensor/test_sharded_tensor",
175
"distributed/_shard/sharded_tensor/test_sharded_tensor_reshard",
176
"distributed/_shard/sharded_tensor/ops/test_embedding",
177
"distributed/_shard/sharded_tensor/ops/test_embedding_bag",
178
"distributed/_shard/sharded_tensor/ops/test_binary_cmp",
179
"distributed/_shard/sharded_tensor/ops/test_init",
180
"distributed/_shard/sharded_optim/test_sharded_optim",
181
"test_determination",
183
"test_cuda_nvml_based_avail",
184
"test_jit_cuda_fuser",
185
"distributed/_tensor/test_attention",
191
"profiler/test_cpp_thread",
192
"profiler/test_execution_trace",
193
"profiler/test_memory_profiler",
194
"profiler/test_profiler",
195
"profiler/test_profiler_tree",
196
"profiler/test_record_function",
197
"profiler/test_torch_tidy",
205
RUN_PARALLEL_BLOCKLIST = [
206
"test_cpp_extensions_jit",
207
"test_cpp_extensions_open_device_registration",
208
"test_cpp_extensions_stream_and_event",
209
"test_cpp_extensions_mtia_backend",
211
"test_mobile_optimizer",
212
"test_multiprocessing",
213
"test_multiprocessing_spawn",
214
"test_namedtuple_return_api",
218
"test_cuda_primary_ctx",
220
"inductor/test_benchmark_fusion",
221
"test_cuda_nvml_based_avail",
223
"test_autograd_fallback",
231
"test_cpp_api_parity",
234
"test_cpp_extensions_jit",
236
"test_tensor_creation_ops",
238
"test_python_dispatch",
241
"nn/test_convolution",
242
"distributions/test_distributions",
244
"functorch/test_memory_efficient_fusion",
246
"test_sort_and_select",
247
"test_backward_compatible_arguments",
251
"inductor/test_max_autotune",
252
"inductor/test_cutlass_backend",
253
"inductor/test_flex_attention",
258
"onnx/test_models_quantized_onnxruntime",
259
"onnx/test_models_onnxruntime",
260
"onnx/test_custom_ops",
261
"onnx/test_utility_funs",
267
"test_autograd_fallback",
271
"test_ops_gradients",
272
"test_ops_fwd_gradients",
279
SLOW_TEST_THRESHOLD = 300
281
DISTRIBUTED_TESTS_CONFIG = {}
284
if dist.is_available():
285
DISTRIBUTED_TESTS_CONFIG["test"] = {"WORLD_SIZE": "1"}
286
if not TEST_WITH_ROCM and dist.is_mpi_available():
287
DISTRIBUTED_TESTS_CONFIG["mpi"] = {
289
"TEST_REPORT_SOURCE_OVERRIDE": "dist-mpi",
291
if dist.is_nccl_available():
292
DISTRIBUTED_TESTS_CONFIG["nccl"] = {
293
"WORLD_SIZE": "2" if torch.cuda.device_count() == 2 else "3",
294
"TEST_REPORT_SOURCE_OVERRIDE": "dist-nccl",
296
if dist.is_gloo_available():
297
DISTRIBUTED_TESTS_CONFIG["gloo"] = {
298
"WORLD_SIZE": "2" if torch.cuda.device_count() == 2 else "3",
299
"TEST_REPORT_SOURCE_OVERRIDE": "dist-gloo",
301
if dist.is_ucc_available():
302
DISTRIBUTED_TESTS_CONFIG["ucc"] = {
303
"WORLD_SIZE": "2" if torch.cuda.device_count() == 2 else "3",
304
"TEST_REPORT_SOURCE_OVERRIDE": "dist-ucc",
305
"UCX_TLS": "tcp,cuda",
306
"UCC_TLS": "nccl,ucp,cuda",
307
"UCC_TL_UCP_TUNE": "cuda:0",
308
"UCC_EC_CUDA_USE_COOPERATIVE_LAUNCH": "n",
312
SIGNALS_TO_NAMES_DICT = {
313
getattr(signal, n): n for n in dir(signal) if n.startswith("SIG") and "_" not in n
316
CPP_EXTENSIONS_ERROR = """
317
Ninja (https://ninja-build.org) is required for some of the C++ extensions
318
tests, but it could not be found. Install ninja with `pip install ninja`
319
or `conda install ninja`. Alternatively, disable said tests with
320
`run_test.py --exclude test_cpp_extensions_aot_ninja test_cpp_extensions_jit`.
323
PYTORCH_COLLECT_COVERAGE = bool(os.environ.get("PYTORCH_COLLECT_COVERAGE"))
325
JIT_EXECUTOR_TESTS = [
326
"test_jit_profiling",
328
"test_jit_fuser_legacy",
331
INDUCTOR_TESTS = [test for test in TESTS if test.startswith(INDUCTOR_TEST_PREFIX)]
332
DISTRIBUTED_TESTS = [test for test in TESTS if test.startswith(DISTRIBUTED_TEST_PREFIX)]
333
TORCH_EXPORT_TESTS = [test for test in TESTS if test.startswith("export")]
334
FUNCTORCH_TESTS = [test for test in TESTS if test.startswith("functorch")]
335
ONNX_TESTS = [test for test in TESTS if test.startswith("onnx")]
336
CPP_TESTS = [test for test in TESTS if test.startswith(CPP_TEST_PREFIX)]
338
TESTS_REQUIRING_LAPACK = [
339
"distributions/test_constraints",
340
"distributions/test_distributions",
344
TESTS_NOT_USING_GRADCHECK = [
352
"test_cpp_extensions_jit",
356
"dynamo/test_recompile_ux",
357
"inductor/test_smoke",
362
def print_to_stderr(message):
363
print(message, file=sys.stderr)
366
def get_executable_command(options, disable_coverage=False, is_cpp_test=False):
367
if options.coverage and not disable_coverage:
369
executable = ["coverage", "run", "--parallel-mode", "--source=torch"]
375
executable = [sys.executable, "-bb"]
377
executable = ["pytest"]
383
test_module: ShardedTest,
387
extra_unittest_args=None,
391
scribe_token = os.getenv("SCRIBE_GRAPHQL_ACCESS_TOKEN", "")
393
print_to_stderr("SCRIBE_GRAPHQL_ACCESS_TOKEN is set")
395
print_to_stderr("SCRIBE_GRAPHQL_ACCESS_TOKEN is NOT set")
397
env = env or os.environ.copy()
398
maybe_set_hip_visible_devies()
399
unittest_args = options.additional_args.copy()
400
test_file = test_module.name
401
stepcurrent_key = test_file
403
is_distributed_test = test_file.startswith(DISTRIBUTED_TEST_PREFIX)
404
is_cpp_test = test_file.startswith(CPP_TEST_PREFIX)
408
if is_cpp_test and RERUN_DISABLED_TESTS:
410
"Skipping C++ tests when running under RERUN_DISABLED_TESTS mode"
415
stepcurrent_key = f"{test_file}_{os.urandom(8).hex()}"
417
unittest_args.extend(
419
f"--shard-id={test_module.shard}",
420
f"--num-shards={test_module.num_shards}",
423
stepcurrent_key = f"{test_file}_{test_module.shard}_{os.urandom(8).hex()}"
426
unittest_args.append(f'-{"v" * options.verbose}')
428
if test_file in RUN_PARALLEL_BLOCKLIST:
430
arg for arg in unittest_args if not arg.startswith("--run-parallel")
433
if extra_unittest_args:
434
assert isinstance(extra_unittest_args, list)
435
unittest_args.extend(extra_unittest_args)
439
unittest_args.extend(
442
is_cpp_test=is_cpp_test,
443
is_distributed_test=is_distributed_test,
446
unittest_args.extend(test_module.get_pytest_args())
447
replacement = {"-f": "-x"}
448
unittest_args = [replacement.get(arg, arg) for arg in unittest_args]
450
if options.showlocals:
452
unittest_args.extend(["--showlocals", "--tb=long", "--color=yes"])
454
unittest_args.append("--locals")
458
if IS_CI and not is_cpp_test:
459
ci_args = ["--import-slow-tests", "--import-disabled-tests"]
460
if RERUN_DISABLED_TESTS:
461
ci_args.append("--rerun-disabled-tests")
463
unittest_args.extend(ci_args)
465
if test_file in PYTEST_SKIP_RETRIES:
466
if not options.pytest:
468
"A test running without pytest cannot skip retries using "
469
"the PYTEST_SKIP_RETRIES set."
471
unittest_args = [arg for arg in unittest_args if "--reruns" not in arg]
474
executable = get_executable_command(options, is_cpp_test=is_cpp_test)
480
if test_file.startswith(CPP_TEST_PREFIX):
483
cpp_test = os.path.join(
485
test_file.replace(f"{CPP_TEST_PREFIX}/", ""),
488
cpp_test = os.path.join(
489
Path(test_directory).parent,
491
test_file.replace(f"{CPP_TEST_PREFIX}/", ""),
495
cpp_test if sys.platform != "win32" else cpp_test + ".exe"
500
argv = [test_file + ".py"] + unittest_args
502
os.makedirs(REPO_ROOT / "test" / "test-reports", exist_ok=True)
503
if options.pipe_logs:
504
log_fd, log_path = tempfile.mkstemp(
505
dir=REPO_ROOT / "test" / "test-reports",
506
prefix=f"{sanitize_file_name(str(test_module))}_",
507
suffix="_toprint.log",
511
command = (launcher_cmd or []) + executable + argv
513
"--subprocess" not in command
514
and not RERUN_DISABLED_TESTS
516
and "-n" not in command
520
if not options.enable_timeout
525
and isinstance(test_module, ShardedTest)
526
and test_module.time is not None
531
print_to_stderr(f"Executing {command} ... [{datetime.now()}]")
533
with ExitStack() as stack:
535
if options.pipe_logs:
536
output = stack.enter_context(open(log_path, "w"))
539
ret_code, was_rerun = run_test_retries(
546
options.continue_through_error,
549
command.extend([f"--sc={stepcurrent_key}", "--print-items"])
550
ret_code, was_rerun = retry_shell(
566
ret_code = 0 if ret_code == 5 else ret_code
568
if options.pipe_logs and print_log:
570
test_module, log_path, failed=(ret_code != 0), was_rerun=was_rerun
575
def try_set_cpp_stack_traces(env, command, set=True):
578
env["TORCH_SHOW_CPP_STACKTRACES"] = "1" if set else "0"
589
continue_through_error,
603
def print_to_file(s):
604
print(s, file=output, flush=True)
606
num_failures = defaultdict(int)
608
print_items = ["--print-items"]
609
sc_command = f"--sc={stepcurrent_key}"
611
ret_code, _ = retry_shell(
612
command + [sc_command] + print_items,
620
ret_code = 0 if ret_code == 5 else ret_code
621
if ret_code == 0 and not sc_command.startswith("--rs="):
623
signal_name = f" ({SIGNALS_TO_NAMES_DICT[-ret_code]})" if ret_code < 0 else ""
624
print_to_file(f"Got exit code {ret_code}{signal_name}")
629
REPO_ROOT / ".pytest_cache/v/cache/stepcurrent" / stepcurrent_key
631
current_failure = f.read()
632
except FileNotFoundError:
634
"No stepcurrent file found. Either pytest didn't get to run (e.g. import error)"
635
+ " or file got deleted (contact dev infra)"
639
env = try_set_cpp_stack_traces(env, command, set=False)
641
num_failures[current_failure] += 1
646
sc_command = f"--scs={stepcurrent_key}"
648
"Test succeeeded in new process, continuing with the rest of the tests"
650
elif num_failures[current_failure] >= 3:
651
if not continue_through_error:
652
print_to_file("Stopping at first consistent failure")
654
sc_command = f"--scs={stepcurrent_key}"
656
"Test failed consistently, "
657
"continuing with the rest of the tests due to continue-through-error being set"
660
env = try_set_cpp_stack_traces(env, command, set=True)
661
sc_command = f"--rs={stepcurrent_key}"
662
print_to_file("Retrying single test...")
665
consistent_failures = [x[1:-1] for x in num_failures.keys() if num_failures[x] >= 3]
666
flaky_failures = [x[1:-1] for x in num_failures.keys() if 0 < num_failures[x] < 3]
667
if len(flaky_failures) > 0:
669
"The following tests failed and then succeeded when run in a new process"
670
+ f"{flaky_failures}",
672
if len(consistent_failures) > 0:
673
print_to_file(f"The following tests failed consistently: {consistent_failures}")
675
return ret_code, any(x > 0 for x in num_failures.values())
678
def run_test_with_subprocess(test_module, test_directory, options):
680
test_module, test_directory, options, extra_unittest_args=["--subprocess"]
684
def _test_cpp_extensions_aot(test_directory, options, use_ninja):
687
from torch.utils import cpp_extension
689
cpp_extension.verify_ninja_availability()
691
print_to_stderr(CPP_EXTENSIONS_ERROR)
695
cpp_extensions_test_dir = os.path.join(test_directory, "cpp_extensions")
696
cpp_extensions_test_build_dir = os.path.join(cpp_extensions_test_dir, "build")
697
if os.path.exists(cpp_extensions_test_build_dir):
698
shutil.rmtree(cpp_extensions_test_build_dir)
701
shell_env = os.environ.copy()
702
shell_env["USE_NINJA"] = str(1 if use_ninja else 0)
703
cmd = [sys.executable, "setup.py", "install", "--root", "./install"]
704
return_code = shell(cmd, cwd=cpp_extensions_test_dir, env=shell_env)
707
if sys.platform != "win32":
710
cwd=os.path.join(cpp_extensions_test_dir, "no_python_abi_suffix_test"),
717
python_path = os.environ.get("PYTHONPATH", "")
718
from shutil import copyfile
720
os.environ["USE_NINJA"] = shell_env["USE_NINJA"]
721
test_module = "test_cpp_extensions_aot" + ("_ninja" if use_ninja else "_no_ninja")
723
test_directory + "/test_cpp_extensions_aot.py",
724
test_directory + "/" + test_module + ".py",
727
cpp_extensions = os.path.join(test_directory, "cpp_extensions")
728
install_directory = ""
730
for root, directories, _ in os.walk(os.path.join(cpp_extensions, "install")):
731
for directory in directories:
732
if "-packages" in directory:
733
install_directory = os.path.join(root, directory)
735
assert install_directory, "install_directory must not be empty"
736
os.environ["PYTHONPATH"] = os.pathsep.join([install_directory, python_path])
737
return run_test(ShardedTest(test_module, 1, 1), test_directory, options)
739
os.environ["PYTHONPATH"] = python_path
740
if os.path.exists(test_directory + "/" + test_module + ".py"):
741
os.remove(test_directory + "/" + test_module + ".py")
742
os.environ.pop("USE_NINJA")
745
def test_cpp_extensions_aot_ninja(test_module, test_directory, options):
746
return _test_cpp_extensions_aot(test_directory, options, use_ninja=True)
749
def test_cpp_extensions_aot_no_ninja(test_module, test_directory, options):
750
return _test_cpp_extensions_aot(test_directory, options, use_ninja=False)
753
def test_autoload_enable(test_module, test_directory, options):
754
return _test_autoload(test_directory, options, enable=True)
757
def test_autoload_disable(test_module, test_directory, options):
758
return _test_autoload(test_directory, options, enable=False)
761
def _test_autoload(test_directory, options, enable=True):
763
cpp_extensions_test_dir = os.path.join(test_directory, "cpp_extensions")
764
cpp_extensions_test_build_dir = os.path.join(cpp_extensions_test_dir, "build")
765
if os.path.exists(cpp_extensions_test_build_dir):
766
shutil.rmtree(cpp_extensions_test_build_dir)
769
cmd = [sys.executable, "setup.py", "install", "--root", "./install"]
770
return_code = shell(cmd, cwd=cpp_extensions_test_dir, env=os.environ)
775
python_path = os.environ.get("PYTHONPATH", "")
778
cpp_extensions = os.path.join(test_directory, "cpp_extensions")
779
install_directory = ""
781
for root, directories, _ in os.walk(os.path.join(cpp_extensions, "install")):
782
for directory in directories:
783
if "-packages" in directory:
784
install_directory = os.path.join(root, directory)
786
assert install_directory, "install_directory must not be empty"
787
os.environ["PYTHONPATH"] = os.pathsep.join([install_directory, python_path])
788
os.environ["TORCH_DEVICE_BACKEND_AUTOLOAD"] = str(int(enable))
790
cmd = [sys.executable, "test_autoload.py"]
791
return_code = shell(cmd, cwd=test_directory, env=os.environ)
794
os.environ["PYTHONPATH"] = python_path
795
os.environ.pop("TORCH_DEVICE_BACKEND_AUTOLOAD")
798
def test_distributed(test_module, test_directory, options):
800
mpi_available = subprocess.call(
801
"command -v mpiexec", shell=True
802
) == 0 and sys.version_info < (3, 9)
803
if options.verbose and not mpi_available:
804
print_to_stderr("MPI not available -- MPI backend tests will be skipped")
806
config = DISTRIBUTED_TESTS_CONFIG
807
for backend, env_vars in config.items():
808
if sys.platform == "win32" and backend != "gloo":
810
if backend == "mpi" and not mpi_available:
812
for with_init_file in {True, False}:
813
if sys.platform == "win32" and not with_init_file:
815
tmp_dir = tempfile.mkdtemp()
817
init_str = "with {} init_method"
818
with_init = init_str.format("file" if with_init_file else "env")
820
f"Running distributed tests for the {backend} backend {with_init}"
822
old_environ = dict(os.environ)
823
os.environ["TEMP_DIR"] = tmp_dir
824
os.environ["BACKEND"] = backend
825
os.environ.update(env_vars)
827
os.mkdir(os.path.join(tmp_dir, "barrier"))
828
os.mkdir(os.path.join(tmp_dir, "test_dir"))
831
with open(os.devnull, "w") as devnull:
832
allowrunasroot_opt = (
833
"--allow-run-as-root"
835
'mpiexec --allow-run-as-root -n 1 bash -c ""',
838
stderr=subprocess.STDOUT,
846
f'mpiexec {allowrunasroot_opt} -n 1 --noprefix bash -c ""',
849
stderr=subprocess.STDOUT,
855
mpiexec = ["mpiexec", "-n", "3", noprefix_opt, allowrunasroot_opt]
857
return_code = run_test(
858
test_module, test_directory, options, launcher_cmd=mpiexec
861
return_code = run_test(
865
extra_unittest_args=["--subprocess"],
870
shutil.rmtree(tmp_dir)
872
os.environ.update(old_environ)
876
def run_doctests(test_module, test_directory, options):
878
Assumes the incoming test module is called doctest, and simply executes the
879
xdoctest runner on the torch library itself.
883
pkgpath = Path(torch.__file__).parent
885
exclude_module_list = ["torch._vendor.*"]
897
"autograd_profiler": 0,
904
if enabled["cuda"] == "auto" and torch.cuda.is_available():
905
enabled["cuda"] = True
908
enabled["cuda1"] == "auto"
909
and torch.cuda.is_available()
910
and torch.cuda.device_count() > 1
912
enabled["cuda1"] = True
914
if enabled["lapack"] == "auto" and torch._C.has_lapack:
915
enabled["lapack"] = True
917
if enabled["qengine"] == "auto":
920
import torch.ao.nn.quantized as nnq
922
torch.backends.quantized.engine = "qnnpack"
923
torch.backends.quantized.engine = "fbgemm"
924
except (ImportError, RuntimeError):
927
enabled["qengine"] = True
929
if enabled["onnx"] == "auto":
935
exclude_module_list.append("torch.onnx.*")
936
enabled["onnx"] = False
938
enabled["onnx"] = True
942
os.environ["TORCH_DOCTEST_CUDA"] = "1"
945
os.environ["TORCH_DOCTEST_CUDA1"] = "1"
947
if enabled["lapack"]:
948
os.environ["TORCH_DOCTEST_LAPACK"] = "1"
950
if enabled["qengine"]:
951
os.environ["TORCH_DOCTEST_QENGINE"] = "1"
953
if enabled["autograd_profiler"]:
954
os.environ["TORCH_DOCTEST_AUTOGRAD_PROFILER"] = "1"
956
if enabled["cpp_ext"]:
957
os.environ["TORCH_DOCTEST_CPP_EXT"] = "1"
959
if enabled["monitor"]:
960
os.environ["TORCH_DOCTEST_MONITOR"] = "1"
963
os.environ["TORCH_DOCTEST_ONNX"] = "1"
967
os.environ["TORCH_DOCTEST_QUANTIZED_DYNAMIC"] = "1"
968
os.environ["TORCH_DOCTEST_ANOMALY"] = "1"
969
os.environ["TORCH_DOCTEST_AUTOGRAD"] = "1"
970
os.environ["TORCH_DOCTEST_HUB"] = "1"
971
os.environ["TORCH_DOCTEST_DATALOADER"] = "1"
972
os.environ["TORCH_DOCTEST_FUTURES"] = "1"
974
pkgpath = os.path.dirname(torch.__file__)
977
"global_exec": r"\n".join(
979
"from torch import nn",
980
"import torch.nn.functional as F",
984
"analysis": "static",
986
"options": "+IGNORE_WHITESPACE",
988
xdoctest_verbose = max(1, options.verbose)
989
run_summary = xdoctest.runner.doctest_module(
991
config=xdoctest_config,
992
verbose=xdoctest_verbose,
993
command=options.xdoctest_command,
995
exclude=exclude_module_list,
997
result = 1 if run_summary.get("n_failed", 0) else 0
1001
def sanitize_file_name(file: str):
1002
return file.replace("\\", ".").replace("/", ".").replace(" ", "_")
1006
test: ShardedTest, file_path: str, failed: bool, was_rerun: bool
1009
with open(file_path, errors="ignore") as f:
1010
full_text = f.read()
1012
new_file = "test/test-reports/" + sanitize_file_name(
1013
f"{test}_{os.urandom(8).hex()}_.log"
1015
os.rename(file_path, REPO_ROOT / new_file)
1017
if not failed and not was_rerun and "=== RERUNS ===" not in full_text:
1021
f"\n{test} was successful, full logs can be found in artifacts with path {new_file}"
1023
for line in full_text.splitlines():
1024
if re.search("Running .* items in this shard:", line):
1025
print_to_stderr(line.rstrip())
1030
print_to_stderr(f"\nPRINTING LOG FILE of {test} ({new_file})")
1031
print_to_stderr(full_text)
1032
print_to_stderr(f"FINISHED PRINTING LOG FILE of {test} ({new_file})\n")
1035
def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
1036
if RERUN_DISABLED_TESTS:
1041
count = 15 if is_distributed_test or TEST_WITH_ASAN else 50
1044
rerun_options = ["--flake-finder", f"--flake-runs={count}"]
1048
rerun_options = ["-x", "--reruns=2"]
1057
pytest_args.extend(["-p", "no:xdist", "--use-pytest"])
1061
pytest_args.extend(["-n", str(NUM_PROCS)])
1066
test_report_path = get_report_path(pytest=True)
1067
pytest_args.extend(["--junit-xml-reruns", test_report_path])
1069
if options.pytest_k_expr:
1070
pytest_args.extend(["-k", options.pytest_k_expr])
1072
pytest_args.extend(rerun_options)
1076
def run_ci_sanity_check(test: ShardedTest, test_directory, options):
1078
test.name == "test_ci_sanity_check_fail"
1079
), f"This handler only works for test_ci_sanity_check_fail, got {test.name}"
1080
ret_code = run_test(test, test_directory, options, print_log=False)
1084
test_reports_dir = str(REPO_ROOT / "test/test-reports")
1086
for file in glob.glob(f"{test_reports_dir}/{test.name}*.log"):
1088
for dirname in glob.glob(f"{test_reports_dir}/**/{test.name}"):
1089
shutil.rmtree(dirname)
1094
"test_cuda_primary_ctx": run_test_with_subprocess,
1095
"test_cuda_nvml_based_avail": run_test_with_subprocess,
1096
"test_cuda_trace": run_test_with_subprocess,
1097
"test_cpp_extensions_aot_no_ninja": test_cpp_extensions_aot_no_ninja,
1098
"test_cpp_extensions_aot_ninja": test_cpp_extensions_aot_ninja,
1099
"distributed/test_distributed_spawn": test_distributed,
1100
"distributed/algorithms/quantization/test_quantization": test_distributed,
1101
"distributed/test_c10d_nccl": run_test_with_subprocess,
1102
"distributed/test_c10d_gloo": run_test_with_subprocess,
1103
"distributed/test_c10d_ucc": run_test_with_subprocess,
1104
"distributed/test_c10d_common": run_test_with_subprocess,
1105
"distributed/test_c10d_spawn_gloo": run_test_with_subprocess,
1106
"distributed/test_c10d_spawn_nccl": run_test_with_subprocess,
1107
"distributed/test_c10d_spawn_ucc": run_test_with_subprocess,
1108
"distributed/test_store": run_test_with_subprocess,
1109
"distributed/test_pg_wrapper": run_test_with_subprocess,
1110
"distributed/rpc/test_faulty_agent": run_test_with_subprocess,
1111
"distributed/rpc/test_tensorpipe_agent": run_test_with_subprocess,
1112
"distributed/rpc/test_share_memory": run_test_with_subprocess,
1113
"distributed/rpc/cuda/test_tensorpipe_agent": run_test_with_subprocess,
1114
"doctests": run_doctests,
1115
"test_ci_sanity_check_fail": run_ci_sanity_check,
1116
"test_autoload_enable": test_autoload_enable,
1117
"test_autoload_disable": test_autoload_disable,
1121
PYTEST_SKIP_RETRIES = {"test_public_bindings"}
1125
parser = argparse.ArgumentParser(
1126
description="Run the PyTorch unit test suite",
1127
epilog="where TESTS is any of: {}".format(", ".join(TESTS)),
1128
formatter_class=argparse.RawTextHelpFormatter,
1130
parser.add_argument(
1135
help="Print verbose information and test-by-test results",
1137
if sys.version_info >= (3, 9):
1138
parser.add_argument(
1140
action=argparse.BooleanOptionalAction,
1141
default=strtobool(os.environ.get("TEST_SHOWLOCALS", "False")),
1142
help="Show local variables in tracebacks (default: True)",
1145
parser.add_argument(
1147
action="store_true",
1148
default=strtobool(os.environ.get("TEST_SHOWLOCALS", "False")),
1149
help="Show local variables in tracebacks (default: True)",
1151
parser.add_argument("--no-showlocals", dest="showlocals", action="store_false")
1152
parser.add_argument("--jit", "--jit", action="store_true", help="run all jit tests")
1153
parser.add_argument(
1154
"--distributed-tests",
1155
"--distributed-tests",
1156
action="store_true",
1157
help="Run all distributed tests",
1159
parser.add_argument(
1162
action="store_true",
1164
"If this flag is present, we will only run functorch tests. "
1165
"If this flag is not present, we will run all tests "
1166
"(including functorch tests)."
1169
parser.add_argument(
1172
action="store_true",
1173
help=("If this flag is present, we will only run test_mps and test_metal"),
1175
parser.add_argument(
1178
action="store_true",
1179
help=("If this flag is present, we will run xpu tests except XPU_BLOCK_LIST"),
1181
parser.add_argument(
1184
action="store_true",
1185
help=("If this flag is present, we will only run C++ tests"),
1187
parser.add_argument(
1190
action="store_true",
1191
help="Only run core tests, or tests that validate PyTorch's ops, modules,"
1192
"and autograd. They are defined by CORE_TEST_LIST.",
1194
parser.add_argument(
1197
action="store_true",
1199
"Only run ONNX tests, or tests that validate PyTorch's ONNX export. "
1200
"If this flag is not present, we will exclude ONNX tests."
1203
parser.add_argument(
1207
help="Pass to pytest as its -k expr argument",
1209
parser.add_argument(
1212
action="store_true",
1213
help="enable coverage",
1214
default=PYTORCH_COLLECT_COVERAGE,
1216
parser.add_argument(
1220
choices=TestChoices(TESTS),
1223
help="select a set of tests to include (defaults to ALL tests)."
1224
" tests must be a part of the TESTS list defined in run_test.py",
1226
parser.add_argument(
1233
help="select a set of tests to exclude",
1235
parser.add_argument(
1236
"--ignore-win-blocklist",
1237
action="store_true",
1238
help="always run blocklisted windows tests",
1245
parser.add_argument(
1246
"--continue-through-error",
1248
action="store_true",
1249
help="Runs the full test suite despite one of the tests failing",
1250
default=strtobool(os.environ.get("CONTINUE_THROUGH_ERROR", "False")),
1252
parser.add_argument(
1254
action="store_true",
1255
help="Print logs to output file while running tests. True if in CI and env var is not set",
1256
default=IS_CI and not strtobool(os.environ.get("VERBOSE_TEST_LOGS", "False")),
1258
parser.add_argument(
1260
action="store_true",
1261
help="Set a timeout based on the test times json file. Only works if there are test times available",
1262
default=IS_CI and not strtobool(os.environ.get("NO_TEST_TIMEOUT", "False")),
1264
parser.add_argument(
1266
action="store_true",
1267
help="Enables removing tests based on TD",
1272
or (TEST_CONFIG == "distributed" and TEST_CUDA)
1273
or (IS_WINDOWS and not TEST_CUDA)
1274
or TEST_CONFIG == "nogpu_AVX512"
1275
or TEST_CONFIG == "nogpu_NO_AVX2"
1276
or TEST_CONFIG == "default"
1278
and get_pr_number() is not None
1279
and not strtobool(os.environ.get("NO_TD", "False"))
1280
and not TEST_WITH_ROCM
1282
and "xpu" not in BUILD_ENVIRONMENT
1283
and "onnx" not in BUILD_ENVIRONMENT
1284
and os.environ.get("GITHUB_WORKFLOW", "slow") in ("trunk", "pull"),
1286
parser.add_argument(
1290
help="runs a shard of the tests (taking into account other selections), e.g., "
1291
"--shard 2 3 will break up the selected tests into 3 shards and run the tests "
1292
"in the 2nd shard (the first number should not exceed the second)",
1294
parser.add_argument(
1295
"--exclude-jit-executor",
1296
action="store_true",
1297
help="exclude tests that are run for a specific jit config",
1299
parser.add_argument(
1300
"--exclude-torch-export-tests",
1301
action="store_true",
1302
help="exclude torch export tests",
1304
parser.add_argument(
1305
"--exclude-distributed-tests",
1306
action="store_true",
1307
help="exclude distributed tests",
1309
parser.add_argument(
1310
"--exclude-inductor-tests",
1311
action="store_true",
1312
help="exclude inductor tests",
1314
parser.add_argument(
1316
action="store_true",
1317
help="Only list the test that will run.",
1319
parser.add_argument(
1320
"--xdoctest-command",
1323
"Control the specific doctest action. "
1324
"Use 'list' to simply parse doctests and check syntax. "
1325
"Use 'all' to execute all doctests or specify a specific "
1329
parser.add_argument(
1330
"--no-translation-validation",
1331
action="store_false",
1332
help="Run tests without translation validation.",
1335
group = parser.add_mutually_exclusive_group()
1338
action="store_true",
1339
help="Run tests with TorchDynamo+EagerBackend turned on",
1343
action="store_true",
1344
help="Run tests with TorchInductor turned on",
1347
args, extra = parser.parse_known_args()
1350
args.additional_args = extra
1355
exclude_list, selected_tests, exclude_message=None, exact_match=False
1357
for exclude_test in exclude_list:
1358
tests_copy = selected_tests[:]
1359
for test in tests_copy:
1361
not exact_match and test.startswith(exclude_test)
1362
) or test == exclude_test:
1363
if exclude_message is not None:
1364
print_to_stderr(f"Excluding {test} {exclude_message}")
1365
selected_tests.remove(test)
1366
return selected_tests
1369
def must_serial(file: Union[str, ShardedTest]) -> bool:
1370
if isinstance(file, ShardedTest):
1373
os.getenv("PYTORCH_TEST_RUN_EVERYTHING_IN_SERIAL", "0") == "1"
1374
or DISTRIBUTED_TEST_PREFIX in os.getenv("TEST_CONFIG", "")
1375
or DISTRIBUTED_TEST_PREFIX in file
1376
or file in CUSTOM_HANDLERS
1377
or file in RUN_PARALLEL_BLOCKLIST
1378
or file in CI_SERIAL_LIST
1379
or file in JIT_EXECUTOR_TESTS
1380
or file in ONNX_SERIAL_LIST
1385
def can_run_in_pytest(test):
1386
return os.getenv("PYTORCH_TEST_DO_NOT_USE_PYTEST", "0") == "0"
1389
def get_selected_tests(options) -> List[str]:
1390
selected_tests = options.include
1394
selected_tests = list(
1395
filter(lambda test_name: "jit" in test_name, selected_tests)
1398
if options.distributed_tests:
1399
selected_tests = list(
1400
filter(lambda test_name: test_name in DISTRIBUTED_TESTS, selected_tests)
1405
selected_tests = list(
1406
filter(lambda test_name: test_name in CORE_TEST_LIST, selected_tests)
1410
if options.functorch:
1411
selected_tests = [tname for tname in selected_tests if tname in FUNCTORCH_TESTS]
1414
selected_tests = [tname for tname in selected_tests if tname in CPP_TESTS]
1418
options.exclude.extend(CPP_TESTS)
1421
selected_tests = ["test_mps", "test_metal", "test_modules", "test_nn"]
1424
options.exclude.extend(["test_mps", "test_metal"])
1427
selected_tests = exclude_tests(XPU_BLOCKLIST, selected_tests, "on XPU")
1430
options.exclude.extend(XPU_TEST)
1433
onnx_tests = [tname for tname in selected_tests if tname in ONNX_TESTS]
1435
selected_tests = onnx_tests
1438
options.exclude.extend(onnx_tests)
1441
if options.exclude_jit_executor:
1442
options.exclude.extend(JIT_EXECUTOR_TESTS)
1444
if options.exclude_distributed_tests:
1445
options.exclude.extend(DISTRIBUTED_TESTS)
1447
if options.exclude_inductor_tests:
1448
options.exclude.extend(INDUCTOR_TESTS)
1450
if options.exclude_torch_export_tests:
1451
options.exclude.extend(TORCH_EXPORT_TESTS)
1454
if torch.version.cuda is not None:
1455
options.exclude.extend(["distributions/test_constraints"])
1458
if sys.version_info >= (3, 12):
1459
options.exclude.extend(
1461
"functorch/test_dims",
1462
"functorch/test_rearrange",
1463
"functorch/test_parsing",
1464
"functorch/test_memory_efficient_fusion",
1465
"torch_np/numpy_tests/core/test_multiarray",
1469
selected_tests = exclude_tests(options.exclude, selected_tests)
1471
if sys.platform == "win32" and not options.ignore_win_blocklist:
1472
target_arch = os.environ.get("VSCMD_ARG_TGT_ARCH")
1473
if target_arch != "x64":
1474
WINDOWS_BLOCKLIST.append("cpp_extensions_aot_no_ninja")
1475
WINDOWS_BLOCKLIST.append("cpp_extensions_aot_ninja")
1476
WINDOWS_BLOCKLIST.append("cpp_extensions_jit")
1477
WINDOWS_BLOCKLIST.append("jit")
1478
WINDOWS_BLOCKLIST.append("jit_fuser")
1480
selected_tests = exclude_tests(WINDOWS_BLOCKLIST, selected_tests, "on Windows")
1482
elif TEST_WITH_ROCM:
1483
selected_tests = exclude_tests(ROCM_BLOCKLIST, selected_tests, "on ROCm")
1486
if not dist.is_available():
1487
selected_tests = exclude_tests(
1490
"PyTorch is built without distributed support.",
1494
if not torch._C.has_lapack:
1495
selected_tests = exclude_tests(
1496
TESTS_REQUIRING_LAPACK,
1498
"PyTorch is built without LAPACK support.",
1501
if TEST_WITH_SLOW_GRADCHECK:
1502
selected_tests = exclude_tests(
1503
TESTS_NOT_USING_GRADCHECK,
1505
"Running in slow gradcheck mode, skipping tests "
1506
"that don't use gradcheck.",
1510
selected_tests = [parse_test_module(x) for x in selected_tests]
1511
return selected_tests
1514
def load_test_times_from_file(file: str) -> Dict[str, Any]:
1516
path = os.path.join(str(REPO_ROOT), file)
1517
if not os.path.exists(path):
1519
f"::warning:: Failed to find test times file `{path}`. Using round robin sharding."
1523
with open(path) as f:
1524
test_times_file = cast(Dict[str, Any], json.load(f))
1525
build_environment = os.environ.get("BUILD_ENVIRONMENT")
1526
test_config = os.environ.get("TEST_CONFIG")
1527
if test_config in test_times_file.get(build_environment, {}):
1528
print_to_stderr("Found test times from artifacts")
1529
return test_times_file[build_environment][test_config]
1530
elif test_config in test_times_file["default"]:
1532
f"::warning:: Gathered no stats from artifacts for {build_environment} build env"
1533
f" and {test_config} test config. Using default build env and {test_config} test config instead."
1535
return test_times_file["default"][test_config]
1538
f"::warning:: Gathered no stats from artifacts for build env {build_environment} build env"
1539
f" and {test_config} test config. Using default build env and default test config instead."
1541
return test_times_file["default"]["default"]
1544
def load_test_file_times(
1545
file: str = ADDITIONAL_CI_FILES_FOLDER / TEST_TIMES_FILE,
1546
) -> Dict[str, float]:
1547
return cast(Dict[str, float], load_test_times_from_file(file))
1550
def load_test_class_times(
1551
file: str = ADDITIONAL_CI_FILES_FOLDER / TEST_CLASS_TIMES_FILE,
1552
) -> Dict[str, Dict[str, float]]:
1553
return cast(Dict[str, Dict[str, float]], load_test_times_from_file(file))
1556
def get_sharding_opts(options) -> Tuple[int, int]:
1557
which_shard, num_shards = 1, 1
1559
assert len(options.shard) == 2, "Unexpected shard format"
1560
assert min(options.shard) > 0, "Shards must be positive numbers"
1561
which_shard, num_shards = options.shard
1563
which_shard <= num_shards
1564
), "Selected shard must be less than or equal to total number of shards"
1566
return (which_shard, num_shards)
1571
selected_tests: Sequence[TestRun],
1572
test_file_times: Dict[str, float],
1573
test_class_times: Dict[str, Dict[str, float]],
1574
sort_by_time: bool = True,
1575
) -> Tuple[float, List[ShardedTest]]:
1576
which_shard, num_shards = get_sharding_opts(options)
1579
shards = calculate_shards(
1583
test_class_times=test_class_times,
1584
must_serial=must_serial,
1585
sort_by_time=sort_by_time,
1587
return shards[which_shard - 1]
1590
class TestFailure(NamedTuple):
1596
test: ShardedTest, test_directory: str, options
1597
) -> Optional[TestFailure]:
1599
maybe_set_hip_visible_devies()
1601
test_name = test.name
1604
print_to_stderr(f"Running {str(test)} ... [{datetime.now()}]")
1605
handler = CUSTOM_HANDLERS.get(test_name, run_test)
1606
return_code = handler(test, test_directory, options)
1607
assert isinstance(return_code, int) and not isinstance(
1609
), f"While running {str(test)} got non integer return code {return_code}"
1610
if return_code == 0:
1613
message = f"{str(test)} failed!"
1617
signal_name = SIGNALS_TO_NAMES_DICT[-return_code]
1618
message += f" Received signal: {signal_name}"
1619
return TestFailure(test.test, message)
1620
except Exception as e:
1621
return TestFailure(test.test, f"{str(test)} failed! {e}")
1625
selected_tests: List[ShardedTest],
1626
test_directory: str,
1628
failures: List[TestFailure],
1630
if len(selected_tests) == 0:
1635
selected_tests_parallel = [x for x in selected_tests if not must_serial(x)]
1636
selected_tests_serial = [
1637
x for x in selected_tests if x not in selected_tests_parallel
1641
pool = get_context("spawn").Pool(
1642
NUM_PROCS, maxtasksperchild=None if torch.version.hip else 1
1650
"pytest_shard_custom.py",
1652
for conftest_file in conftest_files:
1653
cpp_file = os.path.join(CPP_TESTS_DIR, conftest_file)
1656
and os.path.exists(CPP_TESTS_DIR)
1657
and os.path.isdir(CPP_TESTS_DIR)
1658
and not os.path.exists(cpp_file)
1660
shutil.copy(os.path.join(test_directory, conftest_file), cpp_file)
1662
def handle_error_messages(failure: Optional[TestFailure]):
1665
failures.append(failure)
1666
print_to_stderr(failure.message)
1669
def parallel_test_completion_callback(failure):
1670
test_failed = handle_error_messages(failure)
1673
and not options.continue_through_error
1674
and not RERUN_DISABLED_TESTS
1678
keep_going_message = (
1679
"\n\nTip: You can keep running tests even on failure by passing --keep-going to run_test.py.\n"
1680
"If running on CI, add the 'keep-going' label to your PR and rerun your jobs."
1684
for test in selected_tests_serial:
1685
options_clone = copy.deepcopy(options)
1686
if can_run_in_pytest(test):
1687
options_clone.pytest = True
1688
failure = run_test_module(test, test_directory, options_clone)
1689
test_failed = handle_error_messages(failure)
1692
and not options.continue_through_error
1693
and not RERUN_DISABLED_TESTS
1695
raise RuntimeError(failure.message + keep_going_message)
1698
for test in selected_tests_parallel:
1699
options_clone = copy.deepcopy(options)
1700
if can_run_in_pytest(test):
1701
options_clone.pytest = True
1702
options_clone.additional_args.extend(["-m", "serial"])
1703
failure = run_test_module(test, test_directory, options_clone)
1704
test_failed = handle_error_messages(failure)
1707
and not options.continue_through_error
1708
and not RERUN_DISABLED_TESTS
1710
raise RuntimeError(failure.message + keep_going_message)
1712
os.environ["NUM_PARALLEL_PROCS"] = str(NUM_PROCS)
1713
for test in selected_tests_parallel:
1714
options_clone = copy.deepcopy(options)
1715
if can_run_in_pytest(test):
1716
options_clone.pytest = True
1717
options_clone.additional_args.extend(["-m", "not serial"])
1720
args=(test, test_directory, options_clone),
1721
callback=parallel_test_completion_callback,
1725
del os.environ["NUM_PARALLEL_PROCS"]
1734
def check_pip_packages() -> None:
1736
"pytest-rerunfailures",
1737
"pytest-flakefinder",
1740
installed_packages = [i.key for i in pkg_resources.working_set]
1741
for package in packages:
1742
if package not in installed_packages:
1744
f"Missing pip dependency: {package}, please run `pip install -r .ci/docker/requirements-ci.txt`"
1750
check_pip_packages()
1752
options = parse_args()
1755
which_shard, num_shards = get_sharding_opts(options)
1756
add_global_metric("shard", which_shard)
1757
add_global_metric("num_shards", num_shards)
1759
test_directory = str(REPO_ROOT / "test")
1760
selected_tests = get_selected_tests(options)
1762
test_prioritizations = import_results()
1763
test_prioritizations.amend_tests(selected_tests)
1765
os.makedirs(REPO_ROOT / "test" / "test-reports", exist_ok=True)
1767
if options.coverage and not PYTORCH_COLLECT_COVERAGE:
1768
shell(["coverage", "erase"])
1772
get_test_case_configs(dirpath=test_directory)
1774
test_file_times_dict = load_test_file_times()
1775
test_class_times_dict = load_test_class_times()
1778
"""Defines a set of tests with similar priority that should be run together on the current shard"""
1781
sharded_tests: List[ShardedTest]
1782
failures: List[TestFailure]
1785
self, name: str, raw_tests: Sequence[TestRun], should_sort_shard: bool
1789
self.time, self.sharded_tests = do_sharding(
1792
test_file_times_dict,
1793
test_class_times_dict,
1794
sort_by_time=should_sort_shard,
1798
s = f"Name: {self.name} (est. time: {round(self.time / 60, 2)}min)\n"
1799
serial = [test for test in self.sharded_tests if must_serial(test)]
1800
parallel = [test for test in self.sharded_tests if not must_serial(test)]
1801
s += f" Serial tests ({len(serial)}):\n"
1802
s += "".join(f" {test}\n" for test in serial)
1803
s += f" Parallel tests ({len(parallel)}):\n"
1804
s += "".join(f" {test}\n" for test in parallel)
1807
percent_to_run = 25 if options.enable_td else 100
1809
f"Running {percent_to_run}% of tests based on TD"
1810
if options.enable_td
1811
else "Running all tests"
1813
include, exclude = test_prioritizations.get_top_per_tests(percent_to_run)
1815
test_batch = TestBatch("tests to run", include, False)
1816
test_batch_exclude = TestBatch("excluded", exclude, True)
1818
gen_ci_artifact([x.to_json() for x in include], [x.to_json() for x in exclude])
1820
print_to_stderr(f"Running parallel tests on {NUM_PROCS} processes")
1821
print_to_stderr(test_batch)
1822
print_to_stderr(test_batch_exclude)
1828
os.environ["PYTORCH_TEST_WITH_DYNAMO"] = "1"
1830
elif options.inductor:
1831
os.environ["PYTORCH_TEST_WITH_INDUCTOR"] = "1"
1833
if not options.no_translation_validation:
1834
os.environ["PYTORCH_TEST_WITH_TV"] = "1"
1838
start_time = time.time()
1840
test_batch.sharded_tests, test_directory, options, test_batch.failures
1842
elapsed_time = time.time() - start_time
1844
f"Running test batch '{test_batch.name}' cost {round(elapsed_time, 2)} seconds"
1848
if options.coverage:
1849
from coverage import Coverage
1851
with set_cwd(test_directory):
1853
if PYTORCH_COLLECT_COVERAGE:
1855
cov.combine(strict=False)
1857
if not PYTORCH_COLLECT_COVERAGE:
1860
all_failures = test_batch.failures
1863
for test, _ in all_failures:
1864
test_stats = test_prioritizations.get_test_stats(test)
1865
print_to_stderr("Emiting td_test_failure_stats_v2")
1867
"td_test_failure_stats_v2",
1869
"selected_tests": selected_tests,
1870
"failure": str(test),
1874
gen_additional_test_failures_file(
1875
[test.test_file for test, _ in all_failures]
1878
if len(all_failures):
1879
for _, err in all_failures:
1880
print_to_stderr(err)
1883
if not RERUN_DISABLED_TESTS:
1887
if __name__ == "__main__":