pytorch-image-models
742 строки · 24.1 Кб
1""" Optimzier Tests
2
3These tests were adapted from PyTorch' optimizer tests.
4
5"""
6import math7import pytest8import functools9from copy import deepcopy10
11import torch12from torch.testing._internal.common_utils import TestCase13from torch.nn import Parameter14from timm.scheduler import PlateauLRScheduler15
16from timm.optim import create_optimizer_v217
18import importlib19import os20
21torch_backend = os.environ.get('TORCH_BACKEND')22if torch_backend is not None:23importlib.import_module(torch_backend)24torch_device = os.environ.get('TORCH_DEVICE', 'cuda')25
26# HACK relying on internal PyTorch test functionality for comparisons that I don't want to write
27torch_tc = TestCase()28
29
30def _test_basic_cases_template(weight, bias, input, constructor, scheduler_constructors):31weight = Parameter(weight)32bias = Parameter(bias)33input = Parameter(input)34optimizer = constructor(weight, bias)35schedulers = []36for scheduler_constructor in scheduler_constructors:37schedulers.append(scheduler_constructor(optimizer))38
39# to check if the optimizer can be printed as a string40optimizer.__repr__()41
42def fn():43optimizer.zero_grad()44y = weight.mv(input)45if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device():46y = y.cuda(bias.get_device())47loss = (y + bias).pow(2).sum()48loss.backward()49return loss50
51initial_value = fn().item()52for _i in range(200):53for scheduler in schedulers:54if isinstance(scheduler, PlateauLRScheduler):55val_loss = fn()56scheduler.step(val_loss)57else:58scheduler.step()59optimizer.step(fn)60
61assert fn().item() < initial_value62
63
64def _test_state_dict(weight, bias, input, constructor):65weight = Parameter(weight)66bias = Parameter(bias)67input = Parameter(input)68
69def fn_base(optimizer, weight, bias):70optimizer.zero_grad()71i = input_device if weight.device.type != 'cpu' else input72loss = (weight.mv(i) + bias).pow(2).sum()73loss.backward()74return loss75
76optimizer = constructor(weight, bias)77fn = functools.partial(fn_base, optimizer, weight, bias)78
79# Prime the optimizer80for _i in range(20):81optimizer.step(fn)82# Clone the weights and construct new optimizer for them83with torch.no_grad():84weight_c = Parameter(weight.clone().detach())85bias_c = Parameter(bias.clone().detach())86optimizer_c = constructor(weight_c, bias_c)87fn_c = functools.partial(fn_base, optimizer_c, weight_c, bias_c)88# Load state dict89state_dict = deepcopy(optimizer.state_dict())90state_dict_c = deepcopy(optimizer.state_dict())91optimizer_c.load_state_dict(state_dict_c)92
93# Run both optimizations in parallel94for _i in range(20):95optimizer.step(fn)96optimizer_c.step(fn_c)97torch_tc.assertEqual(weight, weight_c)98torch_tc.assertEqual(bias, bias_c)99# Make sure state dict is deterministic with equal but not identical parameters100torch_tc.assertEqual(optimizer.state_dict(), optimizer_c.state_dict())101# Make sure repeated parameters have identical representation in state dict102optimizer_c.param_groups.extend(optimizer_c.param_groups)103torch_tc.assertEqual(optimizer.state_dict()['param_groups'][-1], optimizer_c.state_dict()['param_groups'][-1])104
105# Check that state dict can be loaded even when we cast parameters106# to a different type and move to a different device.107if torch_device == 'cpu':108return109elif torch_device == 'cuda' and not torch.cuda.is_available():110return111
112with torch.no_grad():113input_device = Parameter(input.clone().detach().float().to(torch_device))114weight_device = Parameter(weight.clone().detach().to(torch_device))115bias_device = Parameter(bias.clone().detach().to(torch_device))116optimizer_device = constructor(weight_device, bias_device)117fn_device = functools.partial(fn_base, optimizer_device, weight_device, bias_device)118
119state_dict = deepcopy(optimizer.state_dict())120state_dict_c = deepcopy(optimizer.state_dict())121optimizer_device.load_state_dict(state_dict_c)122
123# Make sure state dict wasn't modified124torch_tc.assertEqual(state_dict, state_dict_c)125
126for _i in range(20):127optimizer.step(fn)128optimizer_device.step(fn_device)129torch_tc.assertEqual(weight, weight_device)130torch_tc.assertEqual(bias, bias_device)131
132# validate deepcopy() copies all public attributes133def getPublicAttr(obj):134return set(k for k in obj.__dict__ if not k.startswith('_'))135
136assert getPublicAttr(optimizer) == getPublicAttr(deepcopy(optimizer))137
138
139def _test_basic_cases(constructor, scheduler_constructors=None):140if scheduler_constructors is None:141scheduler_constructors = []142_test_state_dict(143torch.randn(10, 5),144torch.randn(10),145torch.randn(5),146constructor
147)148_test_basic_cases_template(149torch.randn(10, 5),150torch.randn(10),151torch.randn(5),152constructor,153scheduler_constructors
154)155# non-contiguous parameters156_test_basic_cases_template(157torch.randn(10, 5, 2)[..., 0],158torch.randn(10, 2)[..., 0],159torch.randn(5),160constructor,161scheduler_constructors
162)163# CUDA164if torch_device == 'cpu':165return166elif torch_device == 'cuda' and not torch.cuda.is_available():167return168
169_test_basic_cases_template(170torch.randn(10, 5).to(torch_device),171torch.randn(10).to(torch_device),172torch.randn(5).to(torch_device),173constructor,174scheduler_constructors
175)176
177
178def _test_model(optimizer, params, device=torch.device('cpu')):179weight = torch.tensor(180[[-0.2109, -0.4976], [-0.1413, -0.3420], [-0.2524, 0.6976]],181device=device, requires_grad=True)182bias = torch.tensor([-0.1085, -0.2979, 0.6892], device=device, requires_grad=True)183weight2 = torch.tensor([[-0.0508, -0.3941, -0.2843]], device=device, requires_grad=True)184bias2 = torch.tensor([-0.0711], device=device, requires_grad=True)185input = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], device=device).reshape(3, 2)186
187model = torch.nn.Sequential(torch.nn.Linear(2, 3),188torch.nn.Sigmoid(),189torch.nn.Linear(3, 1),190torch.nn.Sigmoid())191model.to(device)192
193pretrained_dict = model.state_dict()194pretrained_dict['0.weight'] = weight195pretrained_dict['0.bias'] = bias196pretrained_dict['2.weight'] = weight2197pretrained_dict['2.bias'] = bias2198model.load_state_dict(pretrained_dict)199
200optimizer = create_optimizer_v2(model, opt=optimizer, **params)201
202prev_loss = float('inf')203for i in range(20):204optimizer.zero_grad()205output = model(input)206loss = output.sum()207loss.backward()208loss = loss.item()209assert loss < prev_loss210prev_loss = loss211optimizer.step()212
213
214def rosenbrock(tensor):215x, y = tensor216return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2217
218
219def drosenbrock(tensor):220x, y = tensor221return torch.tensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))222
223
224def _test_rosenbrock(constructor, scheduler_constructors=None):225if scheduler_constructors is None:226scheduler_constructors = []227params_t = torch.tensor([1.5, 1.5])228
229params = Parameter(params_t)230optimizer = constructor([params])231schedulers = []232for scheduler_constructor in scheduler_constructors:233schedulers.append(scheduler_constructor(optimizer))234
235solution = torch.tensor([1, 1])236initial_dist = params.clone().detach().dist(solution)237
238def eval(params, w):239# Depending on w, provide only the x or y gradient240optimizer.zero_grad()241loss = rosenbrock(params)242loss.backward()243grad = drosenbrock(params.clone().detach())244# NB: We torture test the optimizer by returning an245# uncoalesced sparse tensor246if w:247i = torch.LongTensor([[0, 0]])248x = grad[0]249v = torch.tensor([x / 4., x - x / 4.])250else:251i = torch.LongTensor([[1, 1]])252y = grad[1]253v = torch.tensor([y - y / 4., y / 4.])254x = torch.sparse.DoubleTensor(i, v, torch.Size([2])).to(dtype=v.dtype)255with torch.no_grad():256params.grad = x.to_dense()257return loss258
259for i in range(2000):260# Do cyclic coordinate descent261w = i % 2262optimizer.step(functools.partial(eval, params, w))263for scheduler in schedulers:264if isinstance(scheduler, PlateauLRScheduler):265scheduler.step(rosenbrock(params))266else:267scheduler.step()268
269torch_tc.assertLessEqual(params.clone().detach().dist(solution), initial_dist)270
271
272def _build_params_dict(weight, bias, **kwargs):273return [{'params': [weight]}, dict(params=[bias], **kwargs)]274
275
276def _build_params_dict_single(weight, bias, **kwargs):277return [dict(params=bias, **kwargs)]278
279
280#@pytest.mark.parametrize('optimizer', ['sgd', 'momentum'])
281# FIXME momentum variant frequently fails in GitHub runner, but never local after many attempts
282@pytest.mark.parametrize('optimizer', ['sgd'])283def test_sgd(optimizer):284_test_basic_cases(285lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)286)287_test_basic_cases(288lambda weight, bias: create_optimizer_v2(289_build_params_dict(weight, bias, lr=1e-2),290optimizer,291lr=1e-3)292)293_test_basic_cases(294lambda weight, bias: create_optimizer_v2(295_build_params_dict_single(weight, bias, lr=1e-2),296optimizer,297lr=1e-3)298)299_test_basic_cases(300lambda weight, bias: create_optimizer_v2(301_build_params_dict_single(weight, bias, lr=1e-2), optimizer)302)303# _test_basic_cases(304# lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3),305# [lambda opt: StepLR(opt, gamma=0.9, step_size=10)]306# )307# _test_basic_cases(308# lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3),309# [lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4, warmup_method="linear")]310# )311# _test_basic_cases(312# lambda weight, bias: optimizer([weight, bias], lr=1e-3),313# [lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4, warmup_method="constant")]314# )315# _test_basic_cases(316# lambda weight, bias: optimizer([weight, bias], lr=1e-3),317# [lambda opt: StepLR(opt, gamma=0.9, step_size=10),318# lambda opt: WarmUpLR(opt, warmup_factor=0.4, warmup_iters=4)]319# )320# _test_basic_cases(321# lambda weight, bias: optimizer([weight, bias], lr=1e-3),322# [lambda opt: StepLR(opt, gamma=0.9, step_size=10),323# lambda opt: ReduceLROnPlateau(opt)]324# )325# _test_basic_cases(326# lambda weight, bias: optimizer([weight, bias], lr=1e-3),327# [lambda opt: StepLR(opt, gamma=0.99, step_size=10),328# lambda opt: ExponentialLR(opt, gamma=0.99),329# lambda opt: ReduceLROnPlateau(opt)]330# )331_test_basic_cases(332lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=3e-3, momentum=1)333)334_test_basic_cases(335lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=3e-3, momentum=1, weight_decay=.1)336)337_test_rosenbrock(338lambda params: create_optimizer_v2(params, optimizer, lr=1e-3)339)340_test_model(optimizer, dict(lr=1e-3))341
342
343@pytest.mark.parametrize('optimizer', ['adamw', 'adam', 'nadam', 'adamax'])344def test_adam(optimizer):345_test_basic_cases(346lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)347)348_test_basic_cases(349lambda weight, bias: create_optimizer_v2(350_build_params_dict(weight, bias, lr=3e-3),351optimizer,352lr=1e-3)353)354_test_basic_cases(355lambda weight, bias: create_optimizer_v2(356_build_params_dict_single(weight, bias, lr=3e-3),357optimizer,358lr=1e-3)359)360_test_rosenbrock(361lambda params: create_optimizer_v2(params, optimizer, lr=5e-2)362)363_test_model(optimizer, dict(lr=5e-2))364
365
366@pytest.mark.parametrize('optimizer', ['adabelief'])367def test_adabelief(optimizer):368_test_basic_cases(369lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)370)371_test_basic_cases(372lambda weight, bias: create_optimizer_v2(373_build_params_dict(weight, bias, lr=3e-3),374optimizer,375lr=1e-3)376)377_test_basic_cases(378lambda weight, bias: create_optimizer_v2(379_build_params_dict_single(weight, bias, lr=3e-3),380optimizer,381lr=1e-3)382)383_test_basic_cases(384lambda weight, bias: create_optimizer_v2(385_build_params_dict_single(weight, bias, lr=3e-3), optimizer)386)387_test_basic_cases(388lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3, weight_decay=1)389)390_test_rosenbrock(391lambda params: create_optimizer_v2(params, optimizer, lr=5e-2)392)393_test_model(optimizer, dict(lr=5e-2))394
395
396@pytest.mark.parametrize('optimizer', ['radam', 'radabelief'])397def test_rectified(optimizer):398_test_basic_cases(399lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)400)401_test_basic_cases(402lambda weight, bias: create_optimizer_v2(403_build_params_dict(weight, bias, lr=3e-3),404optimizer,405lr=1e-3)406)407_test_basic_cases(408lambda weight, bias: create_optimizer_v2(409_build_params_dict_single(weight, bias, lr=3e-3),410optimizer,411lr=1e-3)412)413_test_rosenbrock(414lambda params: create_optimizer_v2(params, optimizer, lr=1e-3)415)416_test_model(optimizer, dict(lr=1e-3))417
418
419@pytest.mark.parametrize('optimizer', ['adadelta', 'adagrad'])420def test_adaother(optimizer):421_test_basic_cases(422lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)423)424_test_basic_cases(425lambda weight, bias: create_optimizer_v2(426_build_params_dict(weight, bias, lr=3e-3),427optimizer,428lr=1e-3)429)430_test_basic_cases(431lambda weight, bias: create_optimizer_v2(432_build_params_dict_single(weight, bias, lr=3e-3),433optimizer,434lr=1e-3)435)436_test_basic_cases(437lambda weight, bias: create_optimizer_v2(438_build_params_dict_single(weight, bias, lr=3e-3), optimizer)439)440_test_basic_cases(441lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3, weight_decay=1)442)443_test_rosenbrock(444lambda params: create_optimizer_v2(params, optimizer, lr=1e-1)445)446_test_model(optimizer, dict(lr=5e-2))447
448
449@pytest.mark.parametrize('optimizer', ['adafactor'])450def test_adafactor(optimizer):451_test_basic_cases(452lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)453)454_test_basic_cases(455lambda weight, bias: create_optimizer_v2(456_build_params_dict(weight, bias, lr=3e-3),457optimizer,458lr=1e-3)459)460_test_basic_cases(461lambda weight, bias: create_optimizer_v2(462_build_params_dict_single(weight, bias, lr=3e-3),463optimizer,464lr=1e-3)465)466_test_basic_cases(467lambda weight, bias: create_optimizer_v2(_build_params_dict_single(weight, bias), optimizer)468)469_test_basic_cases(470lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3, weight_decay=1)471)472_test_rosenbrock(473lambda params: create_optimizer_v2(params, optimizer, lr=5e-2)474)475_test_model(optimizer, dict(lr=5e-2))476
477
478@pytest.mark.parametrize('optimizer', ['lamb', 'lambc'])479def test_lamb(optimizer):480_test_basic_cases(481lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)482)483_test_basic_cases(484lambda weight, bias: create_optimizer_v2(485_build_params_dict(weight, bias, lr=1e-3),486optimizer,487lr=1e-3)488)489_test_basic_cases(490lambda weight, bias: create_optimizer_v2(491_build_params_dict_single(weight, bias, lr=1e-3),492optimizer,493lr=1e-3)494)495_test_basic_cases(496lambda weight, bias: create_optimizer_v2(497_build_params_dict_single(weight, bias, lr=1e-3), optimizer)498)499_test_rosenbrock(500lambda params: create_optimizer_v2(params, optimizer, lr=1e-3)501)502_test_model(optimizer, dict(lr=1e-3))503
504
505@pytest.mark.parametrize('optimizer', ['lars', 'larc', 'nlars', 'nlarc'])506def test_lars(optimizer):507_test_basic_cases(508lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)509)510_test_basic_cases(511lambda weight, bias: create_optimizer_v2(512_build_params_dict(weight, bias, lr=1e-3),513optimizer,514lr=1e-3)515)516_test_basic_cases(517lambda weight, bias: create_optimizer_v2(518_build_params_dict_single(weight, bias, lr=1e-3),519optimizer,520lr=1e-3)521)522_test_basic_cases(523lambda weight, bias: create_optimizer_v2(524_build_params_dict_single(weight, bias, lr=1e-3), optimizer)525)526_test_rosenbrock(527lambda params: create_optimizer_v2(params, optimizer, lr=1e-3)528)529_test_model(optimizer, dict(lr=1e-3))530
531
532@pytest.mark.parametrize('optimizer', ['madgrad', 'madgradw'])533def test_madgrad(optimizer):534_test_basic_cases(535lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)536)537_test_basic_cases(538lambda weight, bias: create_optimizer_v2(539_build_params_dict(weight, bias, lr=3e-3),540optimizer,541lr=1e-3)542)543_test_basic_cases(544lambda weight, bias: create_optimizer_v2(545_build_params_dict_single(weight, bias, lr=3e-3),546optimizer,547lr=1e-3)548)549_test_basic_cases(550lambda weight, bias: create_optimizer_v2(551_build_params_dict_single(weight, bias, lr=3e-3), optimizer)552)553_test_rosenbrock(554lambda params: create_optimizer_v2(params, optimizer, lr=1e-2)555)556_test_model(optimizer, dict(lr=1e-2))557
558
559@pytest.mark.parametrize('optimizer', ['novograd'])560def test_novograd(optimizer):561_test_basic_cases(562lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)563)564_test_basic_cases(565lambda weight, bias: create_optimizer_v2(566_build_params_dict(weight, bias, lr=3e-3),567optimizer,568lr=1e-3)569)570_test_basic_cases(571lambda weight, bias: create_optimizer_v2(572_build_params_dict_single(weight, bias, lr=3e-3),573optimizer,574lr=1e-3)575)576_test_basic_cases(577lambda weight, bias: create_optimizer_v2(578_build_params_dict_single(weight, bias, lr=3e-3), optimizer)579)580_test_rosenbrock(581lambda params: create_optimizer_v2(params, optimizer, lr=1e-3)582)583_test_model(optimizer, dict(lr=1e-3))584
585
586@pytest.mark.parametrize('optimizer', ['rmsprop', 'rmsproptf'])587def test_rmsprop(optimizer):588_test_basic_cases(589lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)590)591_test_basic_cases(592lambda weight, bias: create_optimizer_v2(593_build_params_dict(weight, bias, lr=3e-3),594optimizer,595lr=1e-3)596)597_test_basic_cases(598lambda weight, bias: create_optimizer_v2(599_build_params_dict_single(weight, bias, lr=3e-3),600optimizer,601lr=1e-3)602)603_test_basic_cases(604lambda weight, bias: create_optimizer_v2(605_build_params_dict_single(weight, bias, lr=3e-3), optimizer)606)607_test_rosenbrock(608lambda params: create_optimizer_v2(params, optimizer, lr=1e-2)609)610_test_model(optimizer, dict(lr=1e-2))611
612
613@pytest.mark.parametrize('optimizer', ['adamp'])614def test_adamp(optimizer):615_test_basic_cases(616lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)617)618_test_basic_cases(619lambda weight, bias: create_optimizer_v2(620_build_params_dict(weight, bias, lr=3e-3),621optimizer,622lr=1e-3)623)624_test_basic_cases(625lambda weight, bias: create_optimizer_v2(626_build_params_dict_single(weight, bias, lr=3e-3),627optimizer,628lr=1e-3)629)630_test_basic_cases(631lambda weight, bias: create_optimizer_v2(632_build_params_dict_single(weight, bias, lr=3e-3), optimizer)633)634_test_rosenbrock(635lambda params: create_optimizer_v2(params, optimizer, lr=5e-2)636)637_test_model(optimizer, dict(lr=5e-2))638
639
640@pytest.mark.parametrize('optimizer', ['sgdp'])641def test_sgdp(optimizer):642_test_basic_cases(643lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)644)645_test_basic_cases(646lambda weight, bias: create_optimizer_v2(647_build_params_dict(weight, bias, lr=3e-3),648optimizer,649lr=1e-3)650)651_test_basic_cases(652lambda weight, bias: create_optimizer_v2(653_build_params_dict_single(weight, bias, lr=3e-3),654optimizer,655lr=1e-3)656)657_test_basic_cases(658lambda weight, bias: create_optimizer_v2(659_build_params_dict_single(weight, bias, lr=3e-3), optimizer)660)661_test_rosenbrock(662lambda params: create_optimizer_v2(params, optimizer, lr=1e-3)663)664_test_model(optimizer, dict(lr=1e-3))665
666
667@pytest.mark.parametrize('optimizer', ['lookahead_sgd', 'lookahead_momentum'])668def test_lookahead_sgd(optimizer):669_test_basic_cases(670lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)671)672_test_basic_cases(673lambda weight, bias: create_optimizer_v2(674_build_params_dict(weight, bias, lr=3e-3),675optimizer,676lr=1e-3)677)678_test_basic_cases(679lambda weight, bias: create_optimizer_v2(680_build_params_dict_single(weight, bias, lr=3e-3),681optimizer,682lr=1e-3)683)684_test_basic_cases(685lambda weight, bias: create_optimizer_v2(686_build_params_dict_single(weight, bias, lr=3e-3), optimizer)687)688_test_rosenbrock(689lambda params: create_optimizer_v2(params, optimizer, lr=1e-3)690)691
692
693@pytest.mark.parametrize('optimizer', ['lookahead_adamw', 'lookahead_adam'])694def test_lookahead_adam(optimizer):695_test_basic_cases(696lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)697)698_test_basic_cases(699lambda weight, bias: create_optimizer_v2(700_build_params_dict(weight, bias, lr=3e-3),701optimizer,702lr=1e-3)703)704_test_basic_cases(705lambda weight, bias: create_optimizer_v2(706_build_params_dict_single(weight, bias, lr=3e-3),707optimizer,708lr=1e-3)709)710_test_basic_cases(711lambda weight, bias: create_optimizer_v2(712_build_params_dict_single(weight, bias, lr=3e-3), optimizer)713)714_test_rosenbrock(715lambda params: create_optimizer_v2(params, optimizer, lr=5e-2)716)717
718
719@pytest.mark.parametrize('optimizer', ['lookahead_radam'])720def test_lookahead_radam(optimizer):721_test_basic_cases(722lambda weight, bias: create_optimizer_v2([weight, bias], optimizer, lr=1e-3)723)724_test_basic_cases(725lambda weight, bias: create_optimizer_v2(726_build_params_dict(weight, bias, lr=3e-3),727optimizer,728lr=1e-3)729)730_test_basic_cases(731lambda weight, bias: create_optimizer_v2(732_build_params_dict_single(weight, bias, lr=3e-3),733optimizer,734lr=1e-3)735)736_test_basic_cases(737lambda weight, bias: create_optimizer_v2(738_build_params_dict_single(weight, bias, lr=3e-3), optimizer)739)740_test_rosenbrock(741lambda params: create_optimizer_v2(params, optimizer, lr=1e-4)742)743
744