6
Evolved from parse_log.sh
14
from collections import OrderedDict
17
def parse_log(path_to_log):
19
Returns (train_dict_list, test_dict_list)
21
train_dict_list and test_dict_list are lists of dicts that define the table
25
regex_iteration = re.compile('Iteration (\d+)')
26
regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
27
regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
28
regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')
32
learning_rate = float('NaN')
38
logfile_year = extract_seconds.get_log_created_year(path_to_log)
39
with open(path_to_log) as f:
40
start_time = extract_seconds.get_start_time(f, logfile_year)
41
last_time = start_time
44
iteration_match = regex_iteration.search(line)
46
iteration = float(iteration_match.group(1))
53
time = extract_seconds.extract_datetime_from_line(line,
60
if time.month < last_time.month:
62
time = extract_seconds.extract_datetime_from_line(line, logfile_year)
65
seconds = (time - start_time).total_seconds()
67
learning_rate_match = regex_learning_rate.search(line)
68
if learning_rate_match:
69
learning_rate = float(learning_rate_match.group(1))
71
train_dict_list, train_row = parse_line_for_net_output(
72
regex_train_output, train_row, train_dict_list,
73
line, iteration, seconds, learning_rate
75
test_dict_list, test_row = parse_line_for_net_output(
76
regex_test_output, test_row, test_dict_list,
77
line, iteration, seconds, learning_rate
80
fix_initial_nan_learning_rate(train_dict_list)
81
fix_initial_nan_learning_rate(test_dict_list)
83
return train_dict_list, test_dict_list
86
def parse_line_for_net_output(regex_obj, row, row_dict_list,
87
line, iteration, seconds, learning_rate):
88
"""Parse a single line for training or test output
90
Returns a a tuple with (row_dict_list, row)
91
row: may be either a new row or an augmented version of the current row
92
row_dict_list: may be either the current row_dict_list or an augmented
93
version of the current row_dict_list
96
output_match = regex_obj.search(line)
98
if not row or row['NumIters'] != iteration:
105
row_dict_list.append(row)
108
('NumIters', iteration),
109
('Seconds', seconds),
110
('LearningRate', learning_rate)
115
output_name = output_match.group(2)
116
output_val = output_match.group(3)
117
row[output_name] = float(output_val)
119
if row and len(row_dict_list) >= 1 and len(row) == len(row_dict_list[0]):
122
row_dict_list.append(row)
125
return row_dict_list, row
128
def fix_initial_nan_learning_rate(dict_list):
129
"""Correct initial value of learning rate
131
Learning rate is normally not printed until after the initial test and
132
training step, which means the initial testing and training rows have
133
LearningRate = NaN. Fix this by copying over the LearningRate from the
134
second row, if it exists.
137
if len(dict_list) > 1:
138
dict_list[0]['LearningRate'] = dict_list[1]['LearningRate']
141
def save_csv_files(logfile_path, output_dir, train_dict_list, test_dict_list,
142
delimiter=',', verbose=False):
143
"""Save CSV files to output_dir
145
If the input log file is, e.g., caffe.INFO, the names will be
146
caffe.INFO.train and caffe.INFO.test
149
log_basename = os.path.basename(logfile_path)
150
train_filename = os.path.join(output_dir, log_basename + '.train')
151
write_csv(train_filename, train_dict_list, delimiter, verbose)
153
test_filename = os.path.join(output_dir, log_basename + '.test')
154
write_csv(test_filename, test_dict_list, delimiter, verbose)
157
def write_csv(output_filename, dict_list, delimiter, verbose=False):
163
print('Not writing %s; no lines to write' % output_filename)
167
dialect.delimiter = delimiter
169
with open(output_filename, 'w') as f:
170
dict_writer = csv.DictWriter(f, fieldnames=dict_list[0].keys(),
172
dict_writer.writeheader()
173
dict_writer.writerows(dict_list)
175
print 'Wrote %s' % output_filename
179
description = ('Parse a Caffe training log into two CSV files '
180
'containing training and testing information')
181
parser = argparse.ArgumentParser(description=description)
183
parser.add_argument('logfile_path',
184
help='Path to log file')
186
parser.add_argument('output_dir',
187
help='Directory in which to place output CSV files')
189
parser.add_argument('--verbose',
191
help='Print some extra info (e.g., output filenames)')
193
parser.add_argument('--delimiter',
195
help=('Column delimiter in output files '
196
'(default: \'%(default)s\')'))
198
args = parser.parse_args()
204
train_dict_list, test_dict_list = parse_log(args.logfile_path)
205
save_csv_files(args.logfile_path, args.output_dir, train_dict_list,
206
test_dict_list, delimiter=args.delimiter, verbose=args.verbose)
209
if __name__ == '__main__':