google-research

analyze_experiments.py
107 строк · 3.4 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Analyze the experimental results from the logs."""
17

18
import glob
19

20
from absl import app
21
from absl import flags
22
import matplotlib.pyplot as plt
23
import numpy as np
24

25
FLAGS = flags.FLAGS
26

27
flags.DEFINE_string("dataset_name", "m5",
28
                    "Dataset to analyze the completed experiments for.")
29
flags.DEFINE_integer("minimum_model_count", 10,
30
                     "Minimum model count for an experiment to visualize.")
31

32

33
def scrape_data_from_logs(log_file):
34
  """Scrapes the validation and test metrics data from the logs."""
35

36
  val_metrics = []
37
  test_metrics = []
38
  hyperparameters = []
39

40
  with open(log_file, "r") as myfile:
41
    lines = myfile.read().split("\n")
42
  for ind, line in enumerate(lines):
43
    if line.startswith("Hyperparameters"):
44
      line_comma_sep = lines[ind + 2].split(",")
45
      val_metric = float(line_comma_sep[-2].strip(" "))
46
      test_metric = float(line_comma_sep[-1].strip(" ").strip("]"))
47
      val_metrics.append(val_metric)
48
      test_metrics.append(test_metric)
49
      hyperparameters.append(lines[ind + 1])
50

51
  val_metrics = np.asarray(val_metrics)
52
  test_metrics = np.asarray(test_metrics)
53

54
  return val_metrics, test_metrics, hyperparameters
55

56

57
def display_metrics(val_metrics, test_metrics, title, performance_threshold,
58
                    filename):
59
  """Displays the metrics of the trained models so far."""
60

61
  # Remove the outliers
62
  val_metrics = np.asarray(val_metrics)
63
  test_metrics = np.asarray(test_metrics)
64
  test_metrics = test_metrics[val_metrics < performance_threshold]
65
  val_metrics = val_metrics[val_metrics < performance_threshold]
66

67
  if val_metrics.size > 0:
68
    plt.figure()
69
    plt.plot(val_metrics, test_metrics, "o")
70
    if val_metrics.size > 2:
71
      m, b = np.polyfit(val_metrics, test_metrics, 1)
72
      plt.plot(val_metrics, m * val_metrics + b, "k--")
73
    v_min = np.min([np.min(val_metrics), np.min(test_metrics)]) * 0.8
74
    v_max = np.max([np.max(val_metrics), np.max(test_metrics)]) * 1.0
75
    plt.xlabel("Validation")
76
    plt.ylabel("Test")
77
    plt.title(title)
78
    plt.xlim([v_min, v_max])
79
    plt.ylim([v_min, v_max])
80
    plt.gca().set_aspect("equal", adjustable="box")
81
    plt.savefig(filename)
82

83

84
def main(args):
85
  """Main function to iterate over the experiments."""
86

87
  del args  # Not used.
88

89
  log_files = glob.glob("./logs/experiment_" + str(FLAGS.dataset_name) +
90
                        "*.log")
91

92
  for log_file in log_files:
93

94
    experiment_name = log_file.split("/")[-1]
95
    experiment_name = experiment_name.split(".")[0]
96

97
    val_metrics, test_metrics, hyperparameters = scrape_data_from_logs(log_file)
98

99
    if len(val_metrics) > FLAGS.minimum_model_count:
100
      print("------------------------------------")
101
      print("Experiment name:")
102
      print(experiment_name)
103
      display_metrics(val_metrics, test_metrics, hyperparameters, 1000, "")
104

105

106
if __name__ == "__main__":
107
  app.run(main)
108
google-research

Использование cookies