optuna
215 строк · 7.6 Кб
1from __future__ import annotations
2
3import pandas as pd
4import pytest
5
6from optuna import create_study
7from optuna import create_trial
8from optuna import Trial
9from optuna.testing.storages import STORAGE_MODES
10from optuna.testing.storages import StorageSupplier
11from optuna.trial import TrialState
12
13
14def test_study_trials_dataframe_with_no_trials() -> None:
15study_with_no_trials = create_study()
16trials_df = study_with_no_trials.trials_dataframe()
17assert trials_df.empty
18
19
20@pytest.mark.parametrize("storage_mode", STORAGE_MODES)
21@pytest.mark.parametrize(
22"attrs",
23[
24(
25"number",
26"value",
27"datetime_start",
28"datetime_complete",
29"params",
30"user_attrs",
31"system_attrs",
32"state",
33),
34(
35"number",
36"value",
37"datetime_start",
38"datetime_complete",
39"duration",
40"params",
41"user_attrs",
42"system_attrs",
43"state",
44"intermediate_values",
45"_trial_id",
46"distributions",
47),
48],
49)
50@pytest.mark.parametrize("multi_index", [True, False])
51def test_trials_dataframe(storage_mode: str, attrs: tuple[str, ...], multi_index: bool) -> None:
52def f(trial: Trial) -> float:
53x = trial.suggest_int("x", 1, 1)
54y = trial.suggest_categorical("y", (2.5,))
55trial.set_user_attr("train_loss", 3)
56trial.storage.set_trial_system_attr(trial._trial_id, "foo", "bar")
57value = x + y # 3.5
58
59# Test reported intermediate values, although it in practice is not "intermediate".
60trial.report(value, step=0)
61
62return value
63
64with StorageSupplier(storage_mode) as storage:
65study = create_study(storage=storage)
66study.optimize(f, n_trials=3)
67df = study.trials_dataframe(attrs=attrs, multi_index=multi_index)
68# Change index to access rows via trial number.
69if multi_index:
70df.set_index(("number", ""), inplace=True, drop=False)
71else:
72df.set_index("number", inplace=True, drop=False)
73assert len(df) == 3
74
75# Number columns are as follows (total of 13):
76# non-nested: 6 (number, value, state, datetime_start, datetime_complete, duration)
77# params: 2
78# distributions: 2
79# user_attrs: 1
80# system_attrs: 1
81# intermediate_values: 1
82expected_n_columns = len(attrs)
83if "params" in attrs:
84expected_n_columns += 1
85if "distributions" in attrs:
86expected_n_columns += 1
87assert len(df.columns) == expected_n_columns
88
89for i in range(3):
90assert df.number[i] == i
91assert df.state[i] == "COMPLETE"
92assert df.value[i] == 3.5
93assert isinstance(df.datetime_start[i], pd.Timestamp)
94assert isinstance(df.datetime_complete[i], pd.Timestamp)
95
96if multi_index:
97if "distributions" in attrs:
98assert ("distributions", "x") in df.columns
99assert ("distributions", "y") in df.columns
100if "_trial_id" in attrs:
101assert ("trial_id", "") in df.columns # trial_id depends on other tests.
102if "duration" in attrs:
103assert ("duration", "") in df.columns
104
105assert df.params.x[i] == 1
106assert df.params.y[i] == 2.5
107assert df.user_attrs.train_loss[i] == 3
108assert df.system_attrs.foo[i] == "bar"
109else:
110if "distributions" in attrs:
111assert "distributions_x" in df.columns
112assert "distributions_y" in df.columns
113if "_trial_id" in attrs:
114assert "trial_id" in df.columns # trial_id depends on other tests.
115if "duration" in attrs:
116assert "duration" in df.columns
117
118assert df.params_x[i] == 1
119assert df.params_y[i] == 2.5
120assert df.user_attrs_train_loss[i] == 3
121assert df.system_attrs_foo[i] == "bar"
122
123
124@pytest.mark.parametrize("storage_mode", STORAGE_MODES)
125def test_trials_dataframe_with_failure(storage_mode: str) -> None:
126def f(trial: Trial) -> float:
127x = trial.suggest_int("x", 1, 1)
128y = trial.suggest_categorical("y", (2.5,))
129trial.set_user_attr("train_loss", 3)
130raise ValueError()
131return x + y # 3.5
132
133with StorageSupplier(storage_mode) as storage:
134study = create_study(storage=storage)
135study.optimize(f, n_trials=3, catch=(ValueError,))
136df = study.trials_dataframe()
137# Change index to access rows via trial number.
138df.set_index("number", inplace=True, drop=False)
139assert len(df) == 3
140# non-nested: 6, params: 2, user_attrs: 1 system_attrs: 0
141assert len(df.columns) == 9
142for i in range(3):
143assert df.number[i] == i
144assert df.state[i] == "FAIL"
145assert df.value[i] is None
146assert isinstance(df.datetime_start[i], pd.Timestamp)
147assert isinstance(df.datetime_complete[i], pd.Timestamp)
148assert isinstance(df.duration[i], pd.Timedelta)
149assert df.params_x[i] == 1
150assert df.params_y[i] == 2.5
151assert df.user_attrs_train_loss[i] == 3
152
153
154@pytest.mark.parametrize("attrs", [("value",), ("values",)])
155@pytest.mark.parametrize("multi_index", [True, False])
156def test_trials_dataframe_with_multi_objective_optimization(
157attrs: tuple[str, ...], multi_index: bool
158) -> None:
159def f(trial: Trial) -> tuple[float, float]:
160x = trial.suggest_float("x", 1, 1)
161y = trial.suggest_float("y", 2, 2)
162
163return x + y, x**2 + y**2 # 3, 5
164
165# without set_metric_names()
166study = create_study(directions=["minimize", "maximize"])
167study.optimize(f, n_trials=1)
168df = study.trials_dataframe(attrs=attrs, multi_index=multi_index)
169if multi_index:
170assert df.get("values")[0][0] == 3
171assert df.get("values")[1][0] == 5
172else:
173assert df.values_0[0] == 3
174assert df.values_1[0] == 5
175
176# with set_metric_names()
177study.set_metric_names(["v0", "v1"])
178df = study.trials_dataframe(attrs=attrs, multi_index=multi_index)
179if multi_index:
180assert df.get("values")["v0"][0] == 3
181assert df.get("values")["v1"][0] == 5
182else:
183assert df.get("values_v0")[0] == 3
184assert df.get("values_v1")[0] == 5
185
186
187@pytest.mark.parametrize("attrs", [("value",), ("values",)])
188@pytest.mark.parametrize("multi_index", [True, False])
189def test_trials_dataframe_with_multi_objective_optimization_with_fail_and_pruned(
190attrs: tuple[str, ...], multi_index: bool
191) -> None:
192study = create_study(directions=["minimize", "maximize"])
193study.add_trial(create_trial(state=TrialState.FAIL))
194study.add_trial(create_trial(state=TrialState.PRUNED))
195df = study.trials_dataframe(attrs=attrs, multi_index=multi_index)
196
197# without set_metric_names()
198if multi_index:
199for i in range(2):
200assert df.get("values")[0][i] is None
201assert df.get("values")[1][i] is None
202else:
203for i in range(2):
204assert df.values_0[i] is None
205assert df.values_1[i] is None
206
207# with set_metric_names()
208study.set_metric_names(["v0", "v1"])
209df = study.trials_dataframe(attrs=attrs, multi_index=multi_index)
210if multi_index:
211assert df.get("values")["v0"][0] is None
212assert df.get("values")["v1"][0] is None
213else:
214assert df.get("values_v0")[0] is None
215assert df.get("values_v1")[0] is None
216