CSS-LM

benchmark_utils.py
871 строка · 36.2 Кб
Перенос по словам
1
"""
2
Utilities for working with the local dataset cache.
3
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
4
Copyright by the AllenNLP authors.
5
"""
6

7
import copy
8
import csv
9
import linecache
10
import logging
11
import os
12
import platform
13
import sys
14
from abc import ABC, abstractmethod
15
from collections import defaultdict, namedtuple
16
from datetime import datetime
17
from multiprocessing import Pipe, Process, Queue
18
from multiprocessing.connection import Connection
19
from typing import Callable, Iterable, List, NamedTuple, Optional, Union
20

21
from transformers import AutoConfig, PretrainedConfig
22
from transformers import __version__ as version
23

24
from ..file_utils import is_psutil_available, is_py3nvml_available, is_tf_available, is_torch_available
25
from .benchmark_args_utils import BenchmarkArguments
26

27

28
if is_torch_available():
29
    from torch.cuda import empty_cache as torch_empty_cache
30

31
if is_tf_available():
32
    from tensorflow.python.eager import context as tf_context
33

34
if is_psutil_available():
35
    import psutil
36

37
if is_py3nvml_available():
38
    import py3nvml.py3nvml as nvml
39

40
if platform.system() == "Windows":
41
    from signal import CTRL_C_EVENT as SIGKILL
42
else:
43
    from signal import SIGKILL
44

45

46
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
47

48

49
_is_memory_tracing_enabled = False
50

51
BenchmarkOutput = namedtuple(
52
    "BenchmarkOutput",
53
    [
54
        "time_inference_result",
55
        "memory_inference_result",
56
        "time_train_result",
57
        "memory_train_result",
58
        "inference_summary",
59
        "train_summary",
60
    ],
61
)
62

63

64
def separate_process_wrapper_fn(func: Callable[[], None], do_multi_processing: bool) -> Callable[[], None]:
65
    """
66
        This function wraps another function into its own separated process.
67
        In order to ensure accurate memory measurements it is important that the function
68
        is executed in a separate process
69

70
        Args:
71
            - `func`: (`callable`): function() -> ...
72
                generic function which will be executed in its own separate process
73
            - `do_multi_processing`: (`bool`)
74
                Whether to run function on separate process or not
75
    """
76

77
    def multi_process_func(*args, **kwargs):
78
        # run function in an individual
79
        # process to get correct memory
80
        def wrapper_func(queue: Queue, *args):
81
            try:
82
                result = func(*args)
83
            except Exception as e:
84
                logger.error(e)
85
                print(e)
86
                result = "N/A"
87
            queue.put(result)
88

89
        queue = Queue()
90
        p = Process(target=wrapper_func, args=[queue] + list(args))
91
        p.start()
92
        result = queue.get()
93
        p.join()
94
        return result
95

96
    if do_multi_processing:
97
        logging.info("fFunction {func} is executed in its own process...")
98
        return multi_process_func
99
    else:
100
        return func
101

102

103
def is_memory_tracing_enabled():
104
    global _is_memory_tracing_enabled
105
    return _is_memory_tracing_enabled
106

107

108
class Frame(NamedTuple):
109
    """ `Frame` is a NamedTuple used to gather the current frame state.
110
            `Frame` has the following fields:
111
            - 'filename' (string): Name of the file currently executed
112
            - 'module' (string): Name of the module currently executed
113
            - 'line_number' (int): Number of the line currently executed
114
            - 'event' (string): Event that triggered the tracing (default will be "line")
115
            - 'line_text' (string): Text of the line in the python script
116
    """
117

118
    filename: str
119
    module: str
120
    line_number: int
121
    event: str
122
    line_text: str
123

124

125
class UsedMemoryState(NamedTuple):
126
    """ `UsedMemoryState` are named tuples with the following fields:
127
        - 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
128
        - 'cpu_memory': CPU RSS memory state *before* executing the line
129
        - 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
130
    """
131

132
    frame: Frame
133
    cpu_memory: int
134
    gpu_memory: int
135

136

137
class Memory(NamedTuple):
138
    """ `Memory` NamedTuple have a single field `bytes` and
139
        you can get a human readable str of the number of mega bytes by calling `__repr__`
140
            - `byte` (integer): number of bytes,
141
    """
142

143
    bytes: int
144

145
    def __repr__(self) -> str:
146
        return str(bytes_to_mega_bytes(self.bytes))
147

148

149
class MemoryState(NamedTuple):
150
    """ `MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
151
        - `frame` (`Frame`): the current frame (see above)
152
        - `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
153
        - `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
154
        - `cpu_gpu`: CPU + GPU memory consumed at during the current frame as a `Memory` named tuple
155
    """
156

157
    frame: Frame
158
    cpu: Memory
159
    gpu: Memory
160
    cpu_gpu: Memory
161

162

163
class MemorySummary(NamedTuple):
164
    """ `MemorySummary` namedtuple otherwise with the fields:
165
        - `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
166
            by substracting the memory after executing each line from the memory before executing said line.
167
        - `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
168
            obtained by summing repeated memory increase for a line if it's executed several times.
169
            The list is sorted from the frame with the largest memory consumption to the frame with the smallest (can be negative if memory is released)
170
        - `total`: total memory increase during the full tracing as a `Memory` named tuple (see below).
171
            Line with memory release (negative consumption) are ignored if `ignore_released_memory` is `True` (default).
172
    """
173

174
    sequential: List[MemoryState]
175
    cumulative: List[MemoryState]
176
    current: List[MemoryState]
177
    total: Memory
178

179

180
MemoryTrace = List[UsedMemoryState]
181

182

183
def measure_peak_memory_cpu(function: Callable[[], None], interval=0.5, device_idx=None) -> int:
184
    """
185
        measures peak cpu memory consumption of a given `function`
186
        running the function for at least interval seconds
187
        and at most 20 * interval seconds.
188
        This function is heavily inspired by: `memory_usage`
189
        of the package `memory_profiler`: https://github.com/pythonprofilers/memory_profiler/blob/895c4ac7a08020d66ae001e24067da6dcea42451/memory_profiler.py#L239
190

191
        Args:
192
            - `function`: (`callable`): function() -> ...
193
                function without any arguments to measure for which to measure the peak memory
194

195
            - `interval`: (`float`, `optional`, defaults to `0.5`)
196
                interval in second for which to measure the memory usage
197

198
            - `device_idx`: (`int`, `optional`, defaults to `None`)
199
                device id for which to measure gpu usage
200

201
        Returns:
202
            - `max_memory`: (`int`)
203
                cosumed memory peak in Bytes
204
    """
205

206
    def get_cpu_memory(process_id: int) -> int:
207
        """
208
            measures current cpu memory usage of a given `process_id`
209

210
            Args:
211
                - `process_id`: (`int`)
212
                    process_id for which to measure memory
213

214
            Returns
215
                - `memory`: (`int`)
216
                    cosumed memory in Bytes
217
        """
218
        process = psutil.Process(process_id)
219
        try:
220
            meminfo_attr = "memory_info" if hasattr(process, "memory_info") else "get_memory_info"
221
            memory = getattr(process, meminfo_attr)()[0]
222
        except psutil.AccessDenied:
223
            raise ValueError("Error with Psutil.")
224
        return memory
225

226
    if not is_psutil_available():
227
        logger.warning(
228
            "Psutil not installed, we won't log CPU memory usage. "
229
            "Install Psutil (pip install psutil) to use CPU memory tracing."
230
        )
231
        max_memory = "N/A"
232
    else:
233

234
        class MemoryMeasureProcess(Process):
235

236
            """
237
                `MemoryMeasureProcess` inherits from `Process` and overwrites
238
                its `run()` method. Used to measure the memory usage of a process
239
            """
240

241
            def __init__(self, process_id: int, child_connection: Connection, interval: float):
242
                super().__init__()
243
                self.process_id = process_id
244
                self.interval = interval
245
                self.connection = child_connection
246
                self.num_measurements = 1
247
                self.mem_usage = get_cpu_memory(self.process_id)
248

249
            def run(self):
250
                self.connection.send(0)
251
                stop = False
252
                while True:
253
                    self.mem_usage = max(self.mem_usage, get_cpu_memory(self.process_id))
254
                    self.num_measurements += 1
255

256
                    if stop:
257
                        break
258

259
                    stop = self.connection.poll(self.interval)
260

261
                # send results to parent pipe
262
                self.connection.send(self.mem_usage)
263
                self.connection.send(self.num_measurements)
264

265
        while True:
266
            # create child, parent connection
267
            child_connection, parent_connection = Pipe()
268

269
            # instantiate process
270
            mem_process = MemoryMeasureProcess(os.getpid(), child_connection, interval)
271
            mem_process.start()
272

273
            # wait until we get memory
274
            parent_connection.recv()
275

276
            try:
277
                # execute function
278
                function()
279

280
                # start parent connection
281
                parent_connection.send(0)
282

283
                # receive memory and num measurements
284
                max_memory = parent_connection.recv()
285
                num_measurements = parent_connection.recv()
286
            except Exception:
287
                # kill process in a clean way
288
                parent = psutil.Process(os.getpid())
289
                for child in parent.children(recursive=True):
290
                    os.kill(child.pid, SIGKILL)
291
                mem_process.join(0)
292
                raise RuntimeError("Process killed. Error in Process")
293

294
            # run process at least 20 * interval or until it finishes
295
            mem_process.join(20 * interval)
296

297
            if (num_measurements > 4) or (interval < 1e-6):
298
                break
299

300
            # reduce interval
301
            interval /= 10
302

303
        return max_memory
304

305

306
def start_memory_tracing(
307
    modules_to_trace: Optional[Union[str, Iterable[str]]] = None,
308
    modules_not_to_trace: Optional[Union[str, Iterable[str]]] = None,
309
    events_to_trace: str = "line",
310
    gpus_to_trace: Optional[List[int]] = None,
311
) -> MemoryTrace:
312
    """ Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
313
        See `./benchmark.py` for usage examples.
314
        Current memory consumption is returned using psutil and in particular is the RSS memory
315
            "Resident Set Size” (the non-swapped physical memory the process is using).
316
            See https://psutil.readthedocs.io/en/latest/#psutil.Process.memory_info
317

318
        Args:
319
            - `modules_to_trace`: (None, string, list/tuple of string)
320
                if None, all events are recorded
321
                if string or list of strings: only events from the listed module/sub-module will be recorded (e.g. 'fairseq' or 'transformers.modeling_gpt2')
322
            - `modules_not_to_trace`: (None, string, list/tuple of string)
323
                if None, no module is avoided
324
                if string or list of strings: events from the listed module/sub-module will not be recorded (e.g. 'torch')
325
            - `events_to_trace`: string or list of string of events to be recorded (see official python doc for `sys.settrace` for the list of events)
326
                default to line
327
            - `gpus_to_trace`: (optional list, default None) list of GPUs to trace. Default to tracing all GPUs
328

329
        Return:
330
            - `memory_trace` is a list of `UsedMemoryState` for each event (default each line of the traced script).
331
                - `UsedMemoryState` are named tuples with the following fields:
332
                    - 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
333
                    - 'cpu_memory': CPU RSS memory state *before* executing the line
334
                    - 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
335

336
        `Frame` is a namedtuple used by `UsedMemoryState` to list the current frame state.
337
            `Frame` has the following fields:
338
            - 'filename' (string): Name of the file currently executed
339
            - 'module' (string): Name of the module currently executed
340
            - 'line_number' (int): Number of the line currently executed
341
            - 'event' (string): Event that triggered the tracing (default will be "line")
342
            - 'line_text' (string): Text of the line in the python script
343

344
    """
345
    if is_psutil_available():
346
        process = psutil.Process(os.getpid())
347
    else:
348
        logger.warning(
349
            "Psutil not installed, we won't log CPU memory usage. "
350
            "Install psutil (pip install psutil) to use CPU memory tracing."
351
        )
352
        process = None
353

354
    if is_py3nvml_available():
355
        try:
356
            nvml.nvmlInit()
357
            devices = list(range(nvml.nvmlDeviceGetCount())) if gpus_to_trace is None else gpus_to_trace
358
            nvml.nvmlShutdown()
359
        except (OSError, nvml.NVMLError):
360
            logger.warning("Error while initializing comunication with GPU. " "We won't perform GPU memory tracing.")
361
            log_gpu = False
362
        else:
363
            log_gpu = is_torch_available() or is_tf_available()
364
    else:
365
        logger.warning(
366
            "py3nvml not installed, we won't log GPU memory usage. "
367
            "Install py3nvml (pip install py3nvml) to use GPU memory tracing."
368
        )
369
        log_gpu = False
370

371
    memory_trace = []
372

373
    def traceit(frame, event, args):
374
        """ Tracing method executed before running each line in a module or sub-module
375
            Record memory allocated in a list with debugging information
376
        """
377
        global _is_memory_tracing_enabled
378

379
        if not _is_memory_tracing_enabled:
380
            return traceit
381

382
        # Filter events
383
        if events_to_trace is not None:
384
            if isinstance(events_to_trace, str) and event != events_to_trace:
385
                return traceit
386
            elif isinstance(events_to_trace, (list, tuple)) and event not in events_to_trace:
387
                return traceit
388

389
        if "__name__" not in frame.f_globals:
390
            return traceit
391

392
        # Filter modules
393
        name = frame.f_globals["__name__"]
394
        if not isinstance(name, str):
395
            return traceit
396
        else:
397
            # Filter whitelist of modules to trace
398
            if modules_to_trace is not None:
399
                if isinstance(modules_to_trace, str) and modules_to_trace not in name:
400
                    return traceit
401
                elif isinstance(modules_to_trace, (list, tuple)) and all(m not in name for m in modules_to_trace):
402
                    return traceit
403

404
            # Filter blacklist of modules not to trace
405
            if modules_not_to_trace is not None:
406
                if isinstance(modules_not_to_trace, str) and modules_not_to_trace in name:
407
                    return traceit
408
                elif isinstance(modules_not_to_trace, (list, tuple)) and any(m in name for m in modules_not_to_trace):
409
                    return traceit
410

411
        # Record current tracing state (file, location in file...)
412
        lineno = frame.f_lineno
413
        filename = frame.f_globals["__file__"]
414
        if filename.endswith(".pyc") or filename.endswith(".pyo"):
415
            filename = filename[:-1]
416
        line = linecache.getline(filename, lineno).rstrip()
417
        traced_state = Frame(filename, name, lineno, event, line)
418

419
        # Record current memory state (rss memory) and compute difference with previous memory state
420
        cpu_mem = 0
421
        if process is not None:
422
            mem = process.memory_info()
423
            cpu_mem = mem.rss
424

425
        gpu_mem = 0
426
        if log_gpu:
427
            # Clear GPU caches
428
            if is_torch_available():
429
                torch_empty_cache()
430
            if is_tf_available():
431
                tf_context.context()._clear_caches()  # See https://github.com/tensorflow/tensorflow/issues/20218#issuecomment-416771802
432

433
            # Sum used memory for all GPUs
434
            nvml.nvmlInit()
435

436
            for i in devices:
437
                handle = nvml.nvmlDeviceGetHandleByIndex(i)
438
                meminfo = nvml.nvmlDeviceGetMemoryInfo(handle)
439
                gpu_mem += meminfo.used
440

441
            nvml.nvmlShutdown()
442

443
        mem_state = UsedMemoryState(traced_state, cpu_mem, gpu_mem)
444
        memory_trace.append(mem_state)
445

446
        return traceit
447

448
    sys.settrace(traceit)
449

450
    global _is_memory_tracing_enabled
451
    _is_memory_tracing_enabled = True
452

453
    return memory_trace
454

455

456
def stop_memory_tracing(
457
    memory_trace: Optional[MemoryTrace] = None, ignore_released_memory: bool = True
458
) -> Optional[MemorySummary]:
459
    """ Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
460

461
        Args:
462
            - `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
463
            - `ignore_released_memory` (boolean, default: None): if True we only sum memory increase to compute total memory
464

465
        Return:
466
            - None if `memory_trace` is None
467
            - `MemorySummary` namedtuple otherwise with the fields:
468
                - `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
469
                    by substracting the memory after executing each line from the memory before executing said line.
470
                - `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
471
                    obtained by summing repeated memory increase for a line if it's executed several times.
472
                    The list is sorted from the frame with the largest memory consumption to the frame with the smallest (can be negative if memory is released)
473
                - `total`: total memory increase during the full tracing as a `Memory` named tuple (see below).
474
                    Line with memory release (negative consumption) are ignored if `ignore_released_memory` is `True` (default).
475

476
        `Memory` named tuple have fields
477
            - `byte` (integer): number of bytes,
478
            - `string` (string): same as human readable string (ex: "3.5MB")
479

480
        `Frame` are namedtuple used to list the current frame state and have the following fields:
481
            - 'filename' (string): Name of the file currently executed
482
            - 'module' (string): Name of the module currently executed
483
            - 'line_number' (int): Number of the line currently executed
484
            - 'event' (string): Event that triggered the tracing (default will be "line")
485
            - 'line_text' (string): Text of the line in the python script
486

487
        `MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
488
            - `frame` (`Frame`): the current frame (see above)
489
            - `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
490
            - `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
491
            - `cpu_gpu`: CPU + GPU memory consumed at during the current frame as a `Memory` named tuple
492
    """
493
    global _is_memory_tracing_enabled
494
    _is_memory_tracing_enabled = False
495

496
    if memory_trace is not None and len(memory_trace) > 1:
497
        memory_diff_trace = []
498
        memory_curr_trace = []
499

500
        cumulative_memory_dict = defaultdict(lambda: [0, 0, 0])
501

502
        for ((frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem),) in zip(
503
            memory_trace[:-1], memory_trace[1:]
504
        ):
505
            cpu_mem_inc = next_cpu_mem - cpu_mem
506
            gpu_mem_inc = next_gpu_mem - gpu_mem
507
            cpu_gpu_mem_inc = cpu_mem_inc + gpu_mem_inc
508
            memory_diff_trace.append(
509
                MemoryState(
510
                    frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc),
511
                )
512
            )
513

514
            memory_curr_trace.append(
515
                MemoryState(
516
                    frame=frame,
517
                    cpu=Memory(next_cpu_mem),
518
                    gpu=Memory(next_gpu_mem),
519
                    cpu_gpu=Memory(next_gpu_mem + next_cpu_mem),
520
                )
521
            )
522

523
            cumulative_memory_dict[frame][0] += cpu_mem_inc
524
            cumulative_memory_dict[frame][1] += gpu_mem_inc
525
            cumulative_memory_dict[frame][2] += cpu_gpu_mem_inc
526

527
        cumulative_memory = sorted(
528
            list(cumulative_memory_dict.items()), key=lambda x: x[1][2], reverse=True
529
        )  # order by the total CPU + GPU memory increase
530
        cumulative_memory = list(
531
            MemoryState(
532
                frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc),
533
            )
534
            for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory
535
        )
536

537
        memory_curr_trace = sorted(memory_curr_trace, key=lambda x: x.cpu_gpu.bytes, reverse=True)
538

539
        if ignore_released_memory:
540
            total_memory = sum(max(0, step_trace.cpu_gpu.bytes) for step_trace in memory_diff_trace)
541
        else:
542
            total_memory = sum(step_trace.cpu_gpu.bytes for step_trace in memory_diff_trace)
543

544
        total_memory = Memory(total_memory)
545

546
        return MemorySummary(
547
            sequential=memory_diff_trace, cumulative=cumulative_memory, current=memory_curr_trace, total=total_memory,
548
        )
549

550
    return None
551

552

553
def bytes_to_mega_bytes(memory_amount: int) -> int:
554
    """ Utility to convert a number of bytes (int) into a number of mega bytes (int)
555
    """
556
    return memory_amount >> 20
557

558

559
class Benchmark(ABC):
560
    """
561
    Benchmarks is a simple but feature-complete benchmarking script
562
    to compare memory and time performance of models in Transformers.
563
    """
564

565
    args: BenchmarkArguments
566
    configs: PretrainedConfig
567
    framework: str
568

569
    def __init__(self, args: BenchmarkArguments = None, configs: PretrainedConfig = None):
570
        self.args = args
571
        if configs is None:
572
            self.config_dict = {
573
                model_name: AutoConfig.from_pretrained(model_name) for model_name in self.args.model_names
574
            }
575
        else:
576
            self.config_dict = {model_name: config for model_name, config in zip(self.args.model_names, configs)}
577

578
        if not self.args.no_memory and os.getenv("TRANSFORMERS_USE_MULTIPROCESSING") == 0:
579
            logger.warning(
580
                "Memory consumption will not be measured accurately if `args.no_multi_process` is set to `True.` The flag 'TRANSFORMERS_USE_MULTIPROCESSING' should only be disabled for debugging / testing."
581
            )
582

583
        self._print_fn = None
584
        self._framework_version = None
585
        self._environment_info = None
586

587
    @property
588
    def print_fn(self):
589
        if self._print_fn is None:
590
            if self.args.log_print:
591

592
                def print_and_log(*args):
593
                    with open(self.args.log_filename, "a") as log_file:
594
                        log_file.write("".join(args) + "\n")
595
                    print(*args)
596

597
                self._print_fn = print_and_log
598
            else:
599
                self._print_fn = print
600
        return self._print_fn
601

602
    @property
603
    @abstractmethod
604
    def framework_version(self):
605
        pass
606

607
    @abstractmethod
608
    def _inference_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
609
        pass
610

611
    @abstractmethod
612
    def _train_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
613
        pass
614

615
    @abstractmethod
616
    def _inference_memory(
617
        self, model_name: str, batch_size: int, sequence_length: int
618
    ) -> [Memory, Optional[MemorySummary]]:
619
        pass
620

621
    @abstractmethod
622
    def _train_memory(
623
        self, model_name: str, batch_size: int, sequence_length: int
624
    ) -> [Memory, Optional[MemorySummary]]:
625
        pass
626

627
    def inference_speed(self, *args, **kwargs) -> float:
628
        return separate_process_wrapper_fn(self._inference_speed, self.args.do_multi_processing)(*args, **kwargs)
629

630
    def train_speed(self, *args, **kwargs) -> float:
631
        return separate_process_wrapper_fn(self._train_speed, self.args.do_multi_processing)(*args, **kwargs)
632

633
    def inference_memory(self, *args, **kwargs) -> [Memory, Optional[MemorySummary]]:
634
        return separate_process_wrapper_fn(self._inference_memory, self.args.do_multi_processing)(*args, **kwargs)
635

636
    def train_memory(self, *args, **kwargs) -> [Memory, Optional[MemorySummary]]:
637
        return separate_process_wrapper_fn(self._train_memory, self.args.do_multi_processing)(*args, **kwargs)
638

639
    def run(self):
640
        result_dict = {model_name: {} for model_name in self.args.model_names}
641
        inference_result_time = copy.deepcopy(result_dict)
642
        inference_result_memory = copy.deepcopy(result_dict)
643
        train_result_time = copy.deepcopy(result_dict)
644
        train_result_memory = copy.deepcopy(result_dict)
645

646
        for c, model_name in enumerate(self.args.model_names):
647
            self.print_fn(f"{c + 1} / {len(self.args.model_names)}")
648

649
            model_dict = {
650
                "bs": self.args.batch_sizes,
651
                "ss": self.args.sequence_lengths,
652
                "result": {i: {} for i in self.args.batch_sizes},
653
            }
654
            inference_result_time[model_name] = copy.deepcopy(model_dict)
655
            inference_result_memory[model_name] = copy.deepcopy(model_dict)
656
            train_result_time[model_name] = copy.deepcopy(model_dict)
657
            train_result_memory[model_name] = copy.deepcopy(model_dict)
658

659
            inference_summary = train_summary = None
660

661
            for batch_size in self.args.batch_sizes:
662
                for sequence_length in self.args.sequence_lengths:
663
                    if not self.args.no_inference:
664
                        if not self.args.no_memory:
665
                            memory, inference_summary = self.inference_memory(model_name, batch_size, sequence_length)
666
                            inference_result_memory[model_name]["result"][batch_size][sequence_length] = memory
667
                        if not self.args.no_speed:
668
                            time = self.inference_speed(model_name, batch_size, sequence_length)
669
                            inference_result_time[model_name]["result"][batch_size][sequence_length] = time
670

671
                    if self.args.training:
672
                        if not self.args.no_memory:
673
                            memory, train_summary = self.train_memory(model_name, batch_size, sequence_length)
674
                            train_result_memory[model_name]["result"][batch_size][sequence_length] = memory
675
                        if not self.args.no_speed:
676
                            time = self.train_speed(model_name, batch_size, sequence_length)
677
                            train_result_time[model_name]["result"][batch_size][sequence_length] = time
678

679
        if not self.args.no_inference:
680
            if not self.args.no_speed:
681
                self.print_fn("\n" + 20 * "=" + ("INFERENCE - SPEED - RESULT").center(40) + 20 * "=")
682
                self.print_results(inference_result_time, type_label="Time in s")
683
                self.save_to_csv(inference_result_time, self.args.inference_time_csv_file)
684
                if self.args.is_tpu:
685
                    self.print_fn(
686
                        "TPU was used for inference. Note that the time after compilation stabilized (after ~10 inferences model.forward(..) calls) was measured."
687
                    )
688

689
            if not self.args.no_memory:
690
                self.print_fn("\n" + 20 * "=" + ("INFERENCE - MEMORY - RESULT").center(40) + 20 * "=")
691
                self.print_results(inference_result_memory, type_label="Memory in MB")
692
                self.save_to_csv(inference_result_memory, self.args.inference_memory_csv_file)
693

694
            if self.args.trace_memory_line_by_line:
695
                self.print_fn("\n" + 20 * "=" + ("INFERENCE - MEMOMRY - LINE BY LINE - SUMMARY").center(40) + 20 * "=")
696
                self.print_memory_trace_statistics(inference_summary)
697

698
        if self.args.training:
699
            if not self.args.no_speed:
700
                self.print_fn("\n" + 20 * "=" + ("TRAIN - SPEED - RESULTS").center(40) + 20 * "=")
701
                self.print_results(train_result_time, "Time in s")
702
                self.save_to_csv(train_result_time, self.args.train_time_csv_file)
703
                if self.args.is_tpu:
704
                    self.print_fn(
705
                        "TPU was used for training. Note that the time after compilation stabilized (after ~10 train loss=model.forward(...) + loss.backward() calls) was measured."
706
                    )
707

708
            if not self.args.no_memory:
709
                self.print_fn("\n" + 20 * "=" + ("TRAIN - MEMORY - RESULTS").center(40) + 20 * "=")
710
                self.print_results(train_result_memory, type_label="Memory in MB")
711
                self.save_to_csv(train_result_memory, self.args.train_memory_csv_file)
712

713
            if self.args.trace_memory_line_by_line:
714
                self.print_fn("\n" + 20 * "=" + ("TRAIN - MEMOMRY - LINE BY LINE - SUMMARY").center(40) + 20 * "=")
715
                self.print_memory_trace_statistics(train_summary)
716

717
        if not self.args.no_env_print:
718
            self.print_fn("\n" + 20 * "=" + ("ENVIRONMENT INFORMATION").center(40) + 20 * "=")
719
            self.print_fn(
720
                "\n".join(["- {}: {}".format(prop, val) for prop, val in self.environment_info.items()]) + "\n"
721
            )
722

723
        if self.args.save_to_csv:
724
            with open(self.args.env_info_csv_file, mode="w", newline="") as csv_file:
725
                writer = csv.writer(csv_file)
726
                for key, value in self.environment_info.items():
727
                    writer.writerow([key, value])
728

729
        return BenchmarkOutput(
730
            inference_result_time,
731
            inference_result_memory,
732
            train_result_time,
733
            train_result_memory,
734
            inference_summary,
735
            train_summary,
736
        )
737

738
    @property
739
    def environment_info(self):
740
        if self._environment_info is None:
741
            info = {}
742
            info["transformers_version"] = version
743
            info["framework"] = self.framework
744
            if self.framework == "PyTorch":
745
                info["use_torchscript"] = self.args.torchscript
746
            if self.framework == "TensorFlow":
747
                info["eager_mode"] = self.args.eager_mode
748
                info["use_xla"] = self.args.use_xla
749
            info["framework_version"] = self.framework_version
750
            info["python_version"] = platform.python_version()
751
            info["system"] = platform.system()
752
            info["cpu"] = platform.processor()
753
            info["architecture"] = platform.architecture()[0]
754
            info["date"] = datetime.date(datetime.now())
755
            info["time"] = datetime.time(datetime.now())
756
            info["fp16"] = self.args.fp16
757
            info["use_multiprocessing"] = self.args.do_multi_processing
758
            info["only_pretrain_model"] = self.args.only_pretrain_model
759

760
            if is_psutil_available():
761
                info["cpu_ram_mb"] = bytes_to_mega_bytes(psutil.virtual_memory().total)
762
            else:
763
                logger.warning(
764
                    "Psutil not installed, we won't log available CPU memory."
765
                    "Install psutil (pip install psutil) to log available CPU memory."
766
                )
767
                info["cpu_ram_mb"] = "N/A"
768

769
            info["use_gpu"] = self.args.is_gpu
770
            if self.args.is_gpu:
771
                info["num_gpus"] = 1  # TODO(PVP) Currently only single GPU is supported
772
                if is_py3nvml_available():
773
                    nvml.nvmlInit()
774
                    handle = nvml.nvmlDeviceGetHandleByIndex(self.args.device_idx)
775
                    info["gpu"] = nvml.nvmlDeviceGetName(handle)
776
                    info["gpu_ram_mb"] = bytes_to_mega_bytes(nvml.nvmlDeviceGetMemoryInfo(handle).total)
777
                    info["gpu_power_watts"] = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000
778
                    info["gpu_performance_state"] = nvml.nvmlDeviceGetPerformanceState(handle)
779
                    nvml.nvmlShutdown()
780
                else:
781
                    logger.warning(
782
                        "py3nvml not installed, we won't log GPU memory usage. "
783
                        "Install py3nvml (pip install py3nvml) to log information about GPU."
784
                    )
785
                    info["gpu"] = "N/A"
786
                    info["gpu_ram_mb"] = "N/A"
787
                    info["gpu_power_watts"] = "N/A"
788
                    info["gpu_performance_state"] = "N/A"
789

790
            info["use_tpu"] = self.args.is_tpu
791
            # TODO(PVP): See if we can add more information about TPU
792
            # see: https://github.com/pytorch/xla/issues/2180
793

794
            self._environment_info = info
795
        return self._environment_info
796

797
    def print_results(self, result_dict, type_label):
798
        self.print_fn(80 * "-")
799
        self.print_fn(
800
            "Model Name".center(30) + "Batch Size".center(15) + "Seq Length".center(15) + type_label.center(15)
801
        )
802
        self.print_fn(80 * "-")
803
        for model_name in self.args.model_names:
804
            for batch_size in result_dict[model_name]["bs"]:
805
                for sequence_length in result_dict[model_name]["ss"]:
806
                    result = result_dict[model_name]["result"][batch_size][sequence_length]
807
                    if isinstance(result, float):
808
                        result = round(1000 * result) / 1000
809
                        result = "< 0.001" if result == 0.0 else str(result)
810
                    else:
811
                        result = str(result)
812
                    self.print_fn(
813
                        model_name[:30].center(30) + str(batch_size).center(15),
814
                        str(sequence_length).center(15),
815
                        result.center(15),
816
                    )
817
        self.print_fn(80 * "-")
818

819
    def print_memory_trace_statistics(self, summary: MemorySummary):
820
        self.print_fn(
821
            "\nLine by line memory consumption:\n"
822
            + "\n".join(
823
                f"{state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
824
                for state in summary.sequential
825
            )
826
        )
827
        self.print_fn(
828
            "\nLines with top memory consumption:\n"
829
            + "\n".join(
830
                f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
831
                for state in summary.cumulative[:6]
832
            )
833
        )
834
        self.print_fn(
835
            "\nLines with lowest memory consumption:\n"
836
            + "\n".join(
837
                f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
838
                for state in summary.cumulative[-6:]
839
            )
840
        )
841
        self.print_fn(f"\nTotal memory increase: {summary.total}")
842

843
    def save_to_csv(self, result_dict, filename):
844
        if not self.args.save_to_csv:
845
            return
846
        self.print_fn("Saving results to csv.")
847
        with open(filename, mode="w") as csv_file:
848

849
            assert len(self.args.model_names) > 0, "At least 1 model should be defined, but got {}".format(
850
                self.model_names
851
            )
852

853
            fieldnames = ["model", "batch_size", "sequence_length"]
854
            writer = csv.DictWriter(csv_file, fieldnames=fieldnames + ["result"])
855
            writer.writeheader()
856

857
            for model_name in self.args.model_names:
858
                result_dict_model = result_dict[model_name]["result"]
859
                for bs in result_dict_model:
860
                    for ss in result_dict_model[bs]:
861
                        result_model = result_dict_model[bs][ss]
862
                        writer.writerow(
863
                            {
864
                                "model": model_name,
865
                                "batch_size": bs,
866
                                "sequence_length": ss,
867
                                "result": ("{}" if not isinstance(result_model, float) else "{:.4f}").format(
868
                                    result_model
869
                                ),
870
                            }
871
                        )
872
CSS-LM

Использование cookies