caffe

parse_log.py
210 строк · 7.0 Кб
Перенос по словам
1
#!/usr/bin/env python
2

3
"""
4
Parse training log
5

6
Evolved from parse_log.sh
7
"""
8

9
import os
10
import re
11
import extract_seconds
12
import argparse
13
import csv
14
from collections import OrderedDict
15

16

17
def parse_log(path_to_log):
18
    """Parse log file
19
    Returns (train_dict_list, test_dict_list)
20

21
    train_dict_list and test_dict_list are lists of dicts that define the table
22
    rows
23
    """
24

25
    regex_iteration = re.compile('Iteration (\d+)')
26
    regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
27
    regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
28
    regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')
29

30
    # Pick out lines of interest
31
    iteration = -1
32
    learning_rate = float('NaN')
33
    train_dict_list = []
34
    test_dict_list = []
35
    train_row = None
36
    test_row = None
37

38
    logfile_year = extract_seconds.get_log_created_year(path_to_log)
39
    with open(path_to_log) as f:
40
        start_time = extract_seconds.get_start_time(f, logfile_year)
41
        last_time = start_time
42

43
        for line in f:
44
            iteration_match = regex_iteration.search(line)
45
            if iteration_match:
46
                iteration = float(iteration_match.group(1))
47
            if iteration == -1:
48
                # Only start parsing for other stuff if we've found the first
49
                # iteration
50
                continue
51

52
            try:
53
                time = extract_seconds.extract_datetime_from_line(line,
54
                                                                  logfile_year)
55
            except ValueError:
56
                # Skip lines with bad formatting, for example when resuming solver
57
                continue
58

59
            # if it's another year
60
            if time.month < last_time.month:
61
                logfile_year += 1
62
                time = extract_seconds.extract_datetime_from_line(line, logfile_year)
63
            last_time = time
64

65
            seconds = (time - start_time).total_seconds()
66

67
            learning_rate_match = regex_learning_rate.search(line)
68
            if learning_rate_match:
69
                learning_rate = float(learning_rate_match.group(1))
70

71
            train_dict_list, train_row = parse_line_for_net_output(
72
                regex_train_output, train_row, train_dict_list,
73
                line, iteration, seconds, learning_rate
74
            )
75
            test_dict_list, test_row = parse_line_for_net_output(
76
                regex_test_output, test_row, test_dict_list,
77
                line, iteration, seconds, learning_rate
78
            )
79

80
    fix_initial_nan_learning_rate(train_dict_list)
81
    fix_initial_nan_learning_rate(test_dict_list)
82

83
    return train_dict_list, test_dict_list
84

85

86
def parse_line_for_net_output(regex_obj, row, row_dict_list,
87
                              line, iteration, seconds, learning_rate):
88
    """Parse a single line for training or test output
89

90
    Returns a a tuple with (row_dict_list, row)
91
    row: may be either a new row or an augmented version of the current row
92
    row_dict_list: may be either the current row_dict_list or an augmented
93
    version of the current row_dict_list
94
    """
95

96
    output_match = regex_obj.search(line)
97
    if output_match:
98
        if not row or row['NumIters'] != iteration:
99
            # Push the last row and start a new one
100
            if row:
101
                # If we're on a new iteration, push the last row
102
                # This will probably only happen for the first row; otherwise
103
                # the full row checking logic below will push and clear full
104
                # rows
105
                row_dict_list.append(row)
106

107
            row = OrderedDict([
108
                ('NumIters', iteration),
109
                ('Seconds', seconds),
110
                ('LearningRate', learning_rate)
111
            ])
112

113
        # output_num is not used; may be used in the future
114
        # output_num = output_match.group(1)
115
        output_name = output_match.group(2)
116
        output_val = output_match.group(3)
117
        row[output_name] = float(output_val)
118

119
    if row and len(row_dict_list) >= 1 and len(row) == len(row_dict_list[0]):
120
        # The row is full, based on the fact that it has the same number of
121
        # columns as the first row; append it to the list
122
        row_dict_list.append(row)
123
        row = None
124

125
    return row_dict_list, row
126

127

128
def fix_initial_nan_learning_rate(dict_list):
129
    """Correct initial value of learning rate
130

131
    Learning rate is normally not printed until after the initial test and
132
    training step, which means the initial testing and training rows have
133
    LearningRate = NaN. Fix this by copying over the LearningRate from the
134
    second row, if it exists.
135
    """
136

137
    if len(dict_list) > 1:
138
        dict_list[0]['LearningRate'] = dict_list[1]['LearningRate']
139

140

141
def save_csv_files(logfile_path, output_dir, train_dict_list, test_dict_list,
142
                   delimiter=',', verbose=False):
143
    """Save CSV files to output_dir
144

145
    If the input log file is, e.g., caffe.INFO, the names will be
146
    caffe.INFO.train and caffe.INFO.test
147
    """
148

149
    log_basename = os.path.basename(logfile_path)
150
    train_filename = os.path.join(output_dir, log_basename + '.train')
151
    write_csv(train_filename, train_dict_list, delimiter, verbose)
152

153
    test_filename = os.path.join(output_dir, log_basename + '.test')
154
    write_csv(test_filename, test_dict_list, delimiter, verbose)
155

156

157
def write_csv(output_filename, dict_list, delimiter, verbose=False):
158
    """Write a CSV file
159
    """
160

161
    if not dict_list:
162
        if verbose:
163
            print('Not writing %s; no lines to write' % output_filename)
164
        return
165

166
    dialect = csv.excel
167
    dialect.delimiter = delimiter
168

169
    with open(output_filename, 'w') as f:
170
        dict_writer = csv.DictWriter(f, fieldnames=dict_list[0].keys(),
171
                                     dialect=dialect)
172
        dict_writer.writeheader()
173
        dict_writer.writerows(dict_list)
174
    if verbose:
175
        print 'Wrote %s' % output_filename
176

177

178
def parse_args():
179
    description = ('Parse a Caffe training log into two CSV files '
180
                   'containing training and testing information')
181
    parser = argparse.ArgumentParser(description=description)
182

183
    parser.add_argument('logfile_path',
184
                        help='Path to log file')
185

186
    parser.add_argument('output_dir',
187
                        help='Directory in which to place output CSV files')
188

189
    parser.add_argument('--verbose',
190
                        action='store_true',
191
                        help='Print some extra info (e.g., output filenames)')
192

193
    parser.add_argument('--delimiter',
194
                        default=',',
195
                        help=('Column delimiter in output files '
196
                              '(default: \'%(default)s\')'))
197

198
    args = parser.parse_args()
199
    return args
200

201

202
def main():
203
    args = parse_args()
204
    train_dict_list, test_dict_list = parse_log(args.logfile_path)
205
    save_csv_files(args.logfile_path, args.output_dir, train_dict_list,
206
                   test_dict_list, delimiter=args.delimiter, verbose=args.verbose)
207

208

209
if __name__ == '__main__':
210
    main()
211
caffe

Использование cookies