CSS-LM
265 строк · 10.2 Кб
1# coding=utf-8
2# Copyright 2018 The HuggingFace Inc. team.
3# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""
17Benchmarking the library on inference and training in PyTorch.
18"""
19
20
21import logging22import timeit23from typing import Callable, Optional24
25from transformers import (26MODEL_MAPPING,27MODEL_WITH_LM_HEAD_MAPPING,28PretrainedConfig,29is_py3nvml_available,30is_torch_available,31)
32
33from .benchmark_utils import (34Benchmark,35Memory,36MemorySummary,37measure_peak_memory_cpu,38start_memory_tracing,39stop_memory_tracing,40)
41
42
43if is_torch_available():44import torch45from .benchmark_args import PyTorchBenchmarkArguments46
47
48if is_py3nvml_available():49import py3nvml.py3nvml as nvml50
51
52logger = logging.getLogger(__name__)53
54
55class PyTorchBenchmark(Benchmark):56
57args: PyTorchBenchmarkArguments58configs: PretrainedConfig59framework: str = "PyTorch"60
61@property62def framework_version(self):63return torch.__version__64
65def _inference_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:66_inference = self._prepare_inference_func(model_name, batch_size, sequence_length)67return self._measure_speed(_inference)68
69def _inference_memory(70self, model_name: str, batch_size: int, sequence_length: int71) -> [Memory, Optional[MemorySummary]]:72_inference = self._prepare_inference_func(model_name, batch_size, sequence_length)73return self._measure_memory(_inference)74
75def _train_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:76_train = self._prepare_train_func(model_name, batch_size, sequence_length)77return self._measure_speed(_train)78
79def _train_memory(80self, model_name: str, batch_size: int, sequence_length: int81) -> [Memory, Optional[MemorySummary]]:82_train = self._prepare_train_func(model_name, batch_size, sequence_length)83return self._measure_memory(_train)84
85def _prepare_inference_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:86config = self.config_dict[model_name]87
88if self.args.torchscript:89config.torchscript = True90
91has_model_class_in_config = (92hasattr(config, "architectures")93and isinstance(config.architectures, list)94and len(config.architectures) > 095)96if not self.args.only_pretrain_model and has_model_class_in_config:97try:98model_class = config.architectures[0]99transformers_module = __import__("transformers", fromlist=[model_class])100model_cls = getattr(transformers_module, model_class)101model = model_cls(config)102except ImportError:103raise ImportError(104f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."105)106else:107model = MODEL_MAPPING[config.__class__](config)108
109model.eval()110model.to(self.args.device)111
112# encoder-decoder has vocab size saved differently113vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size114input_ids = torch.randint(vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device)115
116if self.args.fp16:117logger.info("Running training in Mixed Precision...")118assert self.args.is_gpu, "Mixed precision is possible only for GPU."119# amp seems to have memory leaks so that memory usage120# is measured using .half() for now https://github.com/NVIDIA/apex/issues/439121model.half()122
123if self.args.torchscript:124with torch.no_grad():125inference_model = torch.jit.trace(model, input_ids)126else:127inference_model = model128
129def encoder_decoder_forward():130with torch.no_grad():131outputs = inference_model(input_ids, decoder_input_ids=input_ids)132return outputs133
134def encoder_forward():135with torch.no_grad():136outputs = inference_model(input_ids)137return outputs138
139_forward = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward140return _forward141
142def _prepare_train_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:143config = self.config_dict[model_name]144
145has_model_class_in_config = (146hasattr(config, "architectures")147and isinstance(config.architectures, list)148and len(config.architectures) > 0149)150if not self.args.only_pretrain_model and has_model_class_in_config:151try:152model_class = config.architectures[0]153transformers_module = __import__("transformers", fromlist=[model_class])154model_cls = getattr(transformers_module, model_class)155model = model_cls(config)156except ImportError:157raise ImportError(158f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."159)160else:161model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)162
163if self.args.torchscript:164raise NotImplementedError("Training for torchscript is currently not implemented")165else:166train_model = model167
168model.train()169model.to(self.args.device)170
171# encoder-decoder has vocab size saved differently172vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size173input_ids = torch.randint(vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device)174
175if self.args.fp16:176logger.info("Running training in Mixed Precision...")177assert self.args.is_gpu, "Mixed precision is possible only for GPU."178
179# amp seems to have memory leaks so that memory usage180# is measured using .half() for now https://github.com/NVIDIA/apex/issues/439181model.half()182
183def compute_loss_and_backprob_encoder():184loss = train_model(input_ids, labels=input_ids)[0]185loss.backward()186return loss187
188def compute_loss_and_backprob_encoder_decoder():189loss = train_model(input_ids, decoder_input_ids=input_ids, labels=input_ids)[0]190loss.backward()191return loss192
193_train = (194compute_loss_and_backprob_encoder_decoder
195if config.is_encoder_decoder196else compute_loss_and_backprob_encoder197)198return _train199
200def _measure_speed(self, func) -> float:201try:202if self.args.is_tpu or self.args.torchscript:203# run additional 10 times to stabilize compilation for tpu and torchscript204logger.info("Do inference on TPU or torchscript. Running model 5 times to stabilize compilation")205timeit.repeat(206func, repeat=1, number=5,207)208
209# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average210runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,)211
212if self.args.is_tpu and self.args.torch_xla_tpu_print_metrics:213import torch_xla.debug.metrics as met214
215self.print_fn(met.metrics_report())216
217return min(runtimes) / 10.0218except RuntimeError as e:219self.print_fn("Doesn't fit on GPU. {}".format(e))220return "N/A"221
222def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]:223try:224if self.args.trace_memory_line_by_line:225trace = start_memory_tracing("transformers")226
227if self.args.is_tpu:228# tpu229raise NotImplementedError(230"Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with `--no_memory` or `args.no_memory=True`"231)232elif self.args.is_gpu:233if not is_py3nvml_available():234logger.warning(235"py3nvml not installed, we won't log GPU memory usage. "236"Install py3nvml (pip install py3nvml) to log information about GPU."237)238memory = "N/A"239else:240logger.info(241"Measuring total GPU usage on GPU device. Make sure to not have additional processes running on the same GPU."242)243# init nvml244nvml.nvmlInit()245func()246handle = nvml.nvmlDeviceGetHandleByIndex(self.args.device_idx)247meminfo = nvml.nvmlDeviceGetMemoryInfo(handle)248max_bytes_in_use = meminfo.used249memory = Memory(max_bytes_in_use)250# shutdown nvml251nvml.nvmlShutdown()252else:253# cpu254memory_bytes = measure_peak_memory_cpu(func)255memory = Memory(memory_bytes) if isinstance(memory_bytes, int) else memory_bytes256
257if self.args.trace_memory_line_by_line:258summary = stop_memory_tracing(trace)259else:260summary = None261
262return memory, summary263except RuntimeError as e:264self.print_fn("Doesn't fit on GPU. {}".format(e))265return "N/A", None266