CSS-LM

benchmark_tf.py
294 строки · 12.5 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2018 The HuggingFace Inc. team.
3
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
4
#
5
# Licensed under the Apache License, Version 2.0 (the "License");
6
# you may not use this file except in compliance with the License.
7
# You may obtain a copy of the License at
8
#
9
#     http://www.apache.org/licenses/LICENSE-2.0
10
#
11
# Unless required by applicable law or agreed to in writing, software
12
# distributed under the License is distributed on an "AS IS" BASIS,
13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
# See the License for the specific language governing permissions and
15
# limitations under the License.
16
"""
17
    Benchmarking the library on inference and training in PyTorch.
18
"""
19

20

21
import logging
22
import random
23
import timeit
24
from functools import wraps
25
from typing import Callable, Optional
26

27
from transformers import (
28
    TF_MODEL_MAPPING,
29
    TF_MODEL_WITH_LM_HEAD_MAPPING,
30
    PretrainedConfig,
31
    is_py3nvml_available,
32
    is_tf_available,
33
)
34

35
from .benchmark_utils import (
36
    Benchmark,
37
    Memory,
38
    MemorySummary,
39
    measure_peak_memory_cpu,
40
    start_memory_tracing,
41
    stop_memory_tracing,
42
)
43

44

45
if is_tf_available():
46
    import tensorflow as tf
47
    from .benchmark_args_tf import TensorFlowBenchmarkArguments
48
    from tensorflow.python.framework.errors_impl import ResourceExhaustedError
49

50
if is_py3nvml_available():
51
    import py3nvml.py3nvml as nvml
52

53
logger = logging.getLogger(__name__)
54

55

56
def run_with_tf_optimizations(do_eager_mode: bool, use_xla: bool):
57
    def run_func(func):
58
        @wraps(func)
59
        def run_in_eager_mode(*args, **kwargs):
60
            return func(*args, **kwargs)
61

62
        @wraps(func)
63
        @tf.function(experimental_compile=use_xla)
64
        def run_in_graph_mode(*args, **kwargs):
65
            return func(*args, **kwargs)
66

67
        if do_eager_mode is True:
68
            assert (
69
                use_xla is False
70
            ), "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`."
71
            return run_in_eager_mode
72
        else:
73
            return run_in_graph_mode
74

75
    return run_func
76

77

78
def random_input_ids(batch_size: int, sequence_length: int, vocab_size: int) -> ["tf.Tensor"]:
79
    rng = random.Random()
80
    values = [rng.randint(0, vocab_size - 1) for i in range(batch_size * sequence_length)]
81
    return tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32)
82

83

84
class TensorFlowBenchmark(Benchmark):
85

86
    args: TensorFlowBenchmarkArguments
87
    configs: PretrainedConfig
88
    framework: str = "TensorFlow"
89

90
    @property
91
    def framework_version(self):
92
        return tf.__version__
93

94
    def _inference_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
95
        # initialize GPU on separate process
96
        strategy = self.args.strategy
97
        assert strategy is not None, "A device strategy has to be initialized before using TensorFlow."
98
        _inference = self._prepare_inference_func(model_name, batch_size, sequence_length)
99
        return self._measure_speed(_inference)
100

101
    def _train_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
102
        strategy = self.args.strategy
103
        assert strategy is not None, "A device strategy has to be initialized before using TensorFlow."
104
        _train = self._prepare_train_func(model_name, batch_size, sequence_length)
105
        return self._measure_speed(_train)
106

107
    def _inference_memory(
108
        self, model_name: str, batch_size: int, sequence_length: int
109
    ) -> [Memory, Optional[MemorySummary]]:
110
        # initialize GPU on separate process
111
        if self.args.is_gpu:
112
            tf.config.experimental.set_memory_growth(self.args.gpu_list[self.args.device_idx], True)
113
        strategy = self.args.strategy
114
        assert strategy is not None, "A device strategy has to be initialized before using TensorFlow."
115
        _inference = self._prepare_inference_func(model_name, batch_size, sequence_length)
116
        return self._measure_memory(_inference)
117

118
    def _train_memory(
119
        self, model_name: str, batch_size: int, sequence_length: int
120
    ) -> [Memory, Optional[MemorySummary]]:
121
        if self.args.is_gpu:
122
            tf.config.experimental.set_memory_growth(self.args.gpu_list[self.args.device_idx], True)
123
        strategy = self.args.strategy
124
        assert strategy is not None, "A device strategy has to be initialized before using TensorFlow."
125

126
        _train = self._prepare_train_func(model_name, batch_size, sequence_length)
127
        return self._measure_memory(_train)
128

129
    def _prepare_inference_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:
130
        config = self.config_dict[model_name]
131

132
        if self.args.fp16:
133
            raise NotImplementedError("Mixed precision is currently not supported.")
134

135
        has_model_class_in_config = (
136
            hasattr(config, "architectures")
137
            and isinstance(config.architectures, list)
138
            and len(config.architectures) > 0
139
        )
140
        if not self.args.only_pretrain_model and has_model_class_in_config:
141
            try:
142
                model_class = "TF" + config.architectures[0]  # prepend 'TF' for tensorflow model
143
                transformers_module = __import__("transformers", fromlist=[model_class])
144
                model_cls = getattr(transformers_module, model_class)
145
                model = model_cls(config)
146
            except ImportError:
147
                raise ImportError(
148
                    f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."
149
                )
150
        else:
151
            model = TF_MODEL_MAPPING[config.__class__](config)
152

153
        # encoder-decoder has vocab size saved differently
154
        vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
155
        input_ids = random_input_ids(batch_size, sequence_length, vocab_size)
156

157
        @run_with_tf_optimizations(self.args.eager_mode, self.args.use_xla)
158
        def encoder_decoder_forward():
159
            return model(input_ids, decoder_input_ids=input_ids, training=False)
160

161
        @run_with_tf_optimizations(self.args.eager_mode, self.args.use_xla)
162
        def encoder_forward():
163
            return model(input_ids, training=False)
164

165
        _inference = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward
166

167
        return _inference
168

169
    def _prepare_train_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:
170
        config = self.config_dict[model_name]
171

172
        assert (
173
            self.args.eager_mode is False
174
        ), "Training cannot be done in eager mode. Please make sure that `args.eager_mode = False`."
175

176
        if self.args.fp16:
177
            raise NotImplementedError("Mixed precision is currently not supported.")
178

179
        has_model_class_in_config = (
180
            hasattr(config, "architectures")
181
            and isinstance(config.architectures, list)
182
            and len(config.architectures) > 0
183
        )
184
        if not self.args.only_pretrain_model and has_model_class_in_config:
185
            try:
186
                model_class = "TF" + config.architectures[0]  # prepend 'TF' for tensorflow model
187
                transformers_module = __import__("transformers", fromlist=[model_class])
188
                model_cls = getattr(transformers_module, model_class)
189
                model = model_cls(config)
190
            except ImportError:
191
                raise ImportError(
192
                    f"{model_class} does not exist. If you just want to test the pretrained model, you might want to set `--only_pretrain_model` or `args.only_pretrain_model=True`."
193
                )
194
        else:
195
            model = TF_MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
196

197
        # encoder-decoder has vocab size saved differently
198
        vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
199
        input_ids = random_input_ids(batch_size, sequence_length, vocab_size)
200

201
        @run_with_tf_optimizations(self.args.eager_mode, self.args.use_xla)
202
        def encoder_decoder_train():
203
            loss = model(input_ids, decoder_input_ids=input_ids, labels=input_ids, training=True)[0]
204
            gradients = tf.gradients(loss, model.trainable_variables)
205
            return gradients
206

207
        @run_with_tf_optimizations(self.args.eager_mode, self.args.use_xla)
208
        def encoder_train():
209
            loss = model(input_ids, labels=input_ids, training=True)[0]
210
            gradients = tf.gradients(loss, model.trainable_variables)
211
            return gradients
212

213
        _train = encoder_decoder_train if config.is_encoder_decoder else encoder_train
214

215
        return _train
216

217
    def _measure_speed(self, func) -> float:
218
        with self.args.strategy.scope():
219
            try:
220
                if self.args.is_tpu or self.args.use_xla:
221
                    # run additional 10 times to stabilize compilation for tpu
222
                    logger.info("Do inference on TPU. Running model 5 times to stabilize compilation")
223
                    timeit.repeat(func, repeat=1, number=5)
224

225
                # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
226
                runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,)
227

228
                return min(runtimes) / 10.0
229
            except ResourceExhaustedError as e:
230
                self.print_fn("Doesn't fit on GPU. {}".format(e))
231

232
    def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]:
233
        logger.info(
234
            "Note that TensorFlow allocates more memory than"
235
            "it might need to speed up computation."
236
            "The memory reported here corresponds to the memory"
237
            "reported by `nvidia-smi`, which can vary depending"
238
            "on total available memory on the GPU that is used."
239
        )
240
        with self.args.strategy.scope():
241
            try:
242
                if self.args.trace_memory_line_by_line:
243
                    assert (
244
                        self.args.eager_mode
245
                    ), "`args.eager_mode` is set to `False`. Make sure to run model in eager mode to measure memory consumption line by line."
246
                    trace = start_memory_tracing("transformers")
247

248
                if self.args.is_tpu:
249
                    # tpu
250
                    raise NotImplementedError(
251
                        "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with `args.no_memory=True`"
252
                    )
253
                elif self.args.is_gpu:
254
                    # gpu
255
                    if not is_py3nvml_available():
256
                        logger.warning(
257
                            "py3nvml not installed, we won't log GPU memory usage. "
258
                            "Install py3nvml (pip install py3nvml) to log information about GPU."
259
                        )
260
                        memory = "N/A"
261
                    else:
262
                        logger.info(
263
                            "Measuring total GPU usage on GPU device. Make sure to not have additional processes running on the same GPU."
264
                        )
265
                        # init nvml
266
                        nvml.nvmlInit()
267
                        func()
268
                        handle = nvml.nvmlDeviceGetHandleByIndex(self.args.device_idx)
269
                        meminfo = nvml.nvmlDeviceGetMemoryInfo(handle)
270
                        max_bytes_in_use = meminfo.used
271
                        memory = Memory(max_bytes_in_use)
272
                        # shutdown nvml
273
                        nvml.nvmlShutdown()
274
                else:
275
                    # cpu
276
                    if self.args.trace_memory_line_by_line:
277
                        logger.info(
278
                            "When enabling line by line tracing, the max peak memory for CPU is inaccurate in TensorFlow."
279
                        )
280
                        memory = None
281
                    else:
282
                        memory_bytes = measure_peak_memory_cpu(func)
283
                        memory = Memory(memory_bytes) if isinstance(memory_bytes, int) else memory_bytes
284
                if self.args.trace_memory_line_by_line:
285
                    summary = stop_memory_tracing(trace)
286
                    if memory is None:
287
                        memory = summary.total
288
                else:
289
                    summary = None
290

291
                return memory, summary
292
            except ResourceExhaustedError as e:
293
                self.print_fn("Doesn't fit on GPU. {}".format(e))
294
                return "N/A", None
295
CSS-LM

Использование cookies