CSS-LM

benchmark.py
265 строк · 10.2 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2018 The HuggingFace Inc. team.
3
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
4
#
5
# Licensed under the Apache License, Version 2.0 (the "License");
6
# you may not use this file except in compliance with the License.
7
# You may obtain a copy of the License at
8
#
9
#     http://www.apache.org/licenses/LICENSE-2.0
10
#
11
# Unless required by applicable law or agreed to in writing, software
12
# distributed under the License is distributed on an "AS IS" BASIS,
13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
# See the License for the specific language governing permissions and
15
# limitations under the License.
16
"""
17
    Benchmarking the library on inference and training in PyTorch.
18
"""
19

20

21
import logging
22
import timeit
23
from typing import Callable, Optional
24

25
from transformers import (
26
    MODEL_MAPPING,
27
    MODEL_WITH_LM_HEAD_MAPPING,
28
    PretrainedConfig,
29
    is_py3nvml_available,
30
    is_torch_available,
31
)
32

33
from .benchmark_utils import (
34
    Benchmark,
35
    Memory,
36
    MemorySummary,
37
    measure_peak_memory_cpu,
38
    start_memory_tracing,
39
    stop_memory_tracing,
40
)
41

42

43
if is_torch_available():
44
    import torch
45
    from .benchmark_args import PyTorchBenchmarkArguments
46

47

48
if is_py3nvml_available():
49
    import py3nvml.py3nvml as nvml
50

51

52
logger = logging.getLogger(__name__)
53

54

55
class PyTorchBenchmark(Benchmark):
56

57
    args: PyTorchBenchmarkArguments
58
    configs: PretrainedConfig
59
    framework: str = "PyTorch"
60

61
    @property
62
    def framework_version(self):
63
        return torch.__version__
64

65
    def _inference_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
66
        _inference = self._prepare_inference_func(model_name, batch_size, sequence_length)
67
        return self._measure_speed(_inference)
68

69
    def _inference_memory(
70
        self, model_name: str, batch_size: int, sequence_length: int
71
    ) -> [Memory, Optional[MemorySummary]]:
72
        _inference = self._prepare_inference_func(model_name, batch_size, sequence_length)
73
        return self._measure_memory(_inference)
74

75
    def _train_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
76
        _train = self._prepare_train_func(model_name, batch_size, sequence_length)
77
        return self._measure_speed(_train)
78

79
    def _train_memory(
80
        self, model_name: str, batch_size: int, sequence_length: int
81
    ) -> [Memory, Optional[MemorySummary]]:
82
        _train = self._prepare_train_func(model_name, batch_size, sequence_length)
83
        return self._measure_memory(_train)
84

85
    def _prepare_inference_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:
86
        config = self.config_dict[model_name]
87

88
        if self.args.torchscript:
89
            config.torchscript = True
90

91
        has_model_class_in_config = (
92
            hasattr(config, "architectures")
93
            and isinstance(config.architectures, list)
94
            and len(config.architectures) > 0
95
        )
96
        if not self.args.only_pretrain_model and has_model_class_in_config:
97
            try:
98
                model_class = config.architectures[0]
99
                transformers_module = __import__("transformers", fromlist=[model_class])
100
                model_cls = getattr(transformers_module, model_class)
101
                model = model_cls(config)
102
            except ImportError:
103
                raise ImportError(
104
                    f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."
105
                )
106
        else:
107
            model = MODEL_MAPPING[config.__class__](config)
108

109
        model.eval()
110
        model.to(self.args.device)
111

112
        # encoder-decoder has vocab size saved differently
113
        vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
114
        input_ids = torch.randint(vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device)
115

116
        if self.args.fp16:
117
            logger.info("Running training in Mixed Precision...")
118
            assert self.args.is_gpu, "Mixed precision is possible only for GPU."
119
            # amp seems to have memory leaks so that memory usage
120
            # is measured using .half() for now https://github.com/NVIDIA/apex/issues/439
121
            model.half()
122

123
        if self.args.torchscript:
124
            with torch.no_grad():
125
                inference_model = torch.jit.trace(model, input_ids)
126
        else:
127
            inference_model = model
128

129
        def encoder_decoder_forward():
130
            with torch.no_grad():
131
                outputs = inference_model(input_ids, decoder_input_ids=input_ids)
132
            return outputs
133

134
        def encoder_forward():
135
            with torch.no_grad():
136
                outputs = inference_model(input_ids)
137
            return outputs
138

139
        _forward = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward
140
        return _forward
141

142
    def _prepare_train_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:
143
        config = self.config_dict[model_name]
144

145
        has_model_class_in_config = (
146
            hasattr(config, "architectures")
147
            and isinstance(config.architectures, list)
148
            and len(config.architectures) > 0
149
        )
150
        if not self.args.only_pretrain_model and has_model_class_in_config:
151
            try:
152
                model_class = config.architectures[0]
153
                transformers_module = __import__("transformers", fromlist=[model_class])
154
                model_cls = getattr(transformers_module, model_class)
155
                model = model_cls(config)
156
            except ImportError:
157
                raise ImportError(
158
                    f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."
159
                )
160
        else:
161
            model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
162

163
        if self.args.torchscript:
164
            raise NotImplementedError("Training for torchscript is currently not implemented")
165
        else:
166
            train_model = model
167

168
        model.train()
169
        model.to(self.args.device)
170

171
        # encoder-decoder has vocab size saved differently
172
        vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
173
        input_ids = torch.randint(vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device)
174

175
        if self.args.fp16:
176
            logger.info("Running training in Mixed Precision...")
177
            assert self.args.is_gpu, "Mixed precision is possible only for GPU."
178

179
            # amp seems to have memory leaks so that memory usage
180
            # is measured using .half() for now https://github.com/NVIDIA/apex/issues/439
181
            model.half()
182

183
        def compute_loss_and_backprob_encoder():
184
            loss = train_model(input_ids, labels=input_ids)[0]
185
            loss.backward()
186
            return loss
187

188
        def compute_loss_and_backprob_encoder_decoder():
189
            loss = train_model(input_ids, decoder_input_ids=input_ids, labels=input_ids)[0]
190
            loss.backward()
191
            return loss
192

193
        _train = (
194
            compute_loss_and_backprob_encoder_decoder
195
            if config.is_encoder_decoder
196
            else compute_loss_and_backprob_encoder
197
        )
198
        return _train
199

200
    def _measure_speed(self, func) -> float:
201
        try:
202
            if self.args.is_tpu or self.args.torchscript:
203
                # run additional 10 times to stabilize compilation for tpu and torchscript
204
                logger.info("Do inference on TPU or torchscript. Running model 5 times to stabilize compilation")
205
                timeit.repeat(
206
                    func, repeat=1, number=5,
207
                )
208

209
            # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
210
            runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,)
211

212
            if self.args.is_tpu and self.args.torch_xla_tpu_print_metrics:
213
                import torch_xla.debug.metrics as met
214

215
                self.print_fn(met.metrics_report())
216

217
            return min(runtimes) / 10.0
218
        except RuntimeError as e:
219
            self.print_fn("Doesn't fit on GPU. {}".format(e))
220
            return "N/A"
221

222
    def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]:
223
        try:
224
            if self.args.trace_memory_line_by_line:
225
                trace = start_memory_tracing("transformers")
226

227
            if self.args.is_tpu:
228
                # tpu
229
                raise NotImplementedError(
230
                    "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with `--no_memory` or `args.no_memory=True`"
231
                )
232
            elif self.args.is_gpu:
233
                if not is_py3nvml_available():
234
                    logger.warning(
235
                        "py3nvml not installed, we won't log GPU memory usage. "
236
                        "Install py3nvml (pip install py3nvml) to log information about GPU."
237
                    )
238
                    memory = "N/A"
239
                else:
240
                    logger.info(
241
                        "Measuring total GPU usage on GPU device. Make sure to not have additional processes running on the same GPU."
242
                    )
243
                    # init nvml
244
                    nvml.nvmlInit()
245
                    func()
246
                    handle = nvml.nvmlDeviceGetHandleByIndex(self.args.device_idx)
247
                    meminfo = nvml.nvmlDeviceGetMemoryInfo(handle)
248
                    max_bytes_in_use = meminfo.used
249
                    memory = Memory(max_bytes_in_use)
250
                    # shutdown nvml
251
                    nvml.nvmlShutdown()
252
            else:
253
                # cpu
254
                memory_bytes = measure_peak_memory_cpu(func)
255
                memory = Memory(memory_bytes) if isinstance(memory_bytes, int) else memory_bytes
256

257
            if self.args.trace_memory_line_by_line:
258
                summary = stop_memory_tracing(trace)
259
            else:
260
                summary = None
261

262
            return memory, summary
263
        except RuntimeError as e:
264
            self.print_fn("Doesn't fit on GPU. {}".format(e))
265
            return "N/A", None
266
CSS-LM

Использование cookies