pytorch
252 строки · 5.0 Кб
1# Some models have large dataset that doesn't fit in memory. Lower the batch
2# size to test the accuracy.
3batch_size:
4training:
5demucs: 4
6dlrm: 1024
7densenet121: 4
8hf_Reformer: 4
9hf_T5_base: 4
10timm_efficientdet: 1
11llama_v2_7b_16h: 1
12# reduced from 16 due to cudagraphs OOM in TorchInductor dashboard
13yolov3: 8
14
15inference:
16timm_efficientdet: 32
17
18
19dont_change_batch_size:
20- demucs
21- pytorch_struct
22- pyhpc_turbulent_kinetic_energy
23# https://github.com/pytorch/benchmark/pull/1656
24- vision_maskrcnn
25
26
27tolerance:
28# Need lower tolerance on GPU. GPU kernels have non deterministic kernels for these models.
29higher:
30- alexnet
31- attention_is_all_you_need_pytorch
32- densenet121
33- hf_Albert
34- vgg16
35- mobilenet_v3_large
36- nvidia_deeprecommender
37- timm_efficientdet
38
39# These models need >1e-3 tolerance
40even_higher:
41- soft_actor_critic
42- tacotron2
43
44higher_fp16:
45- doctr_reco_predictor
46- drq
47- hf_Whisper
48
49higher_bf16:
50- doctr_reco_predictor
51- drq
52- hf_Whisper
53
54cosine: []
55
56
57# These benchmarks took >600s on an i9-11900K CPU
58very_slow: &VERY_SLOW_MODELS
59# 3339s
60- hf_BigBird
61# 3062s
62- hf_Longformer
63# 930s
64- hf_T5
65
66
67# These benchmarks took >60s on an i9-11900K CPU
68slow:
69- *VERY_SLOW_MODELS
70# 137s
71- BERT_pytorch
72# 116s
73- demucs
74# 242s
75- fastNLP_Bert
76# 221s
77- hf_Albert
78# 400s
79- hf_Bart
80# 334s
81- hf_Bert
82# 187s
83- hf_DistilBert
84# 470s
85- hf_GPT2
86# 141s
87- hf_Reformer
88# 317s
89- speech_transformer
90# 99s
91- vision_maskrcnn
92
93
94non_deterministic:
95# https://github.com/pytorch/pytorch/issues/98355
96- mobilenet_v3_large
97
98
99dtype:
100force_amp_for_fp16_bf16_models:
101- DALLE2_pytorch
102- doctr_det_predictor
103- doctr_reco_predictor
104- Super_SloMo
105- tts_angular
106- pyhpc_turbulent_kinetic_energy
107- detectron2_fcos_r_50_fpn
108
109force_fp16_for_bf16_models:
110- vision_maskrcnn
111
112
113# models in canary_models that we should run anyway
114canary_models:
115- torchrec_dlrm
116
117
118detectron2_models: &DETECTRON2_MODELS
119- detectron2_fasterrcnn_r_101_c4
120- detectron2_fasterrcnn_r_101_dc5
121- detectron2_fasterrcnn_r_101_fpn
122- detectron2_fasterrcnn_r_50_c4
123- detectron2_fasterrcnn_r_50_dc5
124- detectron2_fasterrcnn_r_50_fpn
125- detectron2_maskrcnn_r_101_c4
126- detectron2_maskrcnn_r_101_fpn
127- detectron2_maskrcnn_r_50_fpn
128
129
130# These models support only train mode. So accuracy checking can't be done in
131# eval mode.
132only_training:
133- *DETECTRON2_MODELS
134- tts_angular
135- tacotron2
136- demucs
137- hf_Reformer
138- pytorch_struct
139- yolov3
140
141
142trt_not_yet_working:
143- alexnet
144- resnet18
145- resnet50
146- mobilenet_v2
147- mnasnet1_0
148- squeezenet1_1
149- shufflenetv2_x1_0
150- vgg16
151- resnext50_32x4d
152
153
154skip:
155all:
156# OOMs (A100 40G)
157- detectron2_maskrcnn
158# TIMEOUT, https://github.com/pytorch/pytorch/issues/98467
159- tacotron2
160# Failing in eager mode
161- hf_clip
162# multi gpu not always available in benchmark runners
163- simple_gpt_tp_manual
164
165device:
166cpu:
167# OOMs
168- hf_T5_generate
169# model is CUDA only
170- cm3leon_generate
171# timeout
172- nanogpt
173# timeout
174- sam
175# model is CUDA only
176- llama_v2_7b_16h
177# flaky
178- stable_diffusion
179# requires FBGEMM, CUDA only
180- torchrec_dlrm
181- simple_gpt
182# works on cuda, accuracy failure on cpu
183- hf_Whisper
184- stable_diffusion_text_encoder
185
186cuda: []
187
188test:
189training:
190- *DETECTRON2_MODELS
191# not designed for training
192- pyhpc_equation_of_state
193- pyhpc_isoneutral_mixing
194- pyhpc_turbulent_kinetic_energy
195- maml
196- llama
197- llama_v2_7b_16h
198- simple_gpt
199# Model's DEFAULT_TRAIN_BSIZE is not implemented
200- cm3leon_generate
201- hf_T5_generate
202- doctr_det_predictor
203- doctr_reco_predictor
204# doesnt fit in memory
205- phi_1_5
206- detectron2_fcos_r_50_fpn
207
208control_flow:
209- cm3leon_generate
210- detectron2_fcos_r_50_fpn
211- fastNLP_Bert
212- hf_Longformer
213- hf_Reformer
214- hf_T5_generate
215- opacus_cifar10
216- speech_transformer
217
218# Models that should only run in --multiprocess mode
219multiprocess:
220- simple_gpt
221
222# for these models, conv-batchnorm fusing causes big numerical churn.
223# Skip them
224freezing:
225- mnasnet1_0
226- moco
227- shufflenet_v2_x1_0
228
229
230
231
232accuracy:
233skip:
234large_models:
235# Models too large to have eager, dynamo and fp64_numbers simultaneosuly
236# even for 40 GB machine. We have tested accuracy for smaller version of
237# these models
238- hf_GPT2_large
239- hf_T5_large
240- timm_vision_transformer_large
241# accuracy https://github.com/pytorch/pytorch/issues/93847
242- maml
243- llama_v2_7b_16h
244- Background_Matting
245- stable_diffusion_unet
246eager_not_deterministic:
247# Models that deterministic algorithms can not be turned on for eager mode.
248- Background_Matting
249
250max_batch_size:
251hf_GPT2: 2
252pytorch_unet: 2
253