skypilot

huggingface_glue_imdb_grid_search_app.py
45 строк · 1.7 Кб
Перенос по словам
1
"""Grid search version of huggingface_glue_imdb_app.py."""
2
import sky
3

4
resources_to_launch = sky.Resources(sky.AWS(), accelerators={'V100': 4})
5
with sky.Dag() as dag:
6
    # Setup command, run once (pip, download dataset).
7
    common_setup = """\
8
        git clone https://github.com/huggingface/transformers/
9
        cd transformers
10
        pip3 install .
11
        cd examples/pytorch/text-classification
12
        pip3 install -r requirements.txt
13
        python3 -c 'import datasets; datasets.load_dataset("imdb")'"""
14
    sky.Task(setup=common_setup).set_resources(resources_to_launch)
15
# `detach_run` will only detach the `run` command. The provision and `setup` are
16
# still blocking.
17
sky.launch(dag, cluster_name='hgs', detach_run=True)
18

19
for lr in [1e-5, 2e-5, 3e-5, 4e-5]:
20
    # To be filled in: {lr}.
21
    run_format = f"""\
22
        cd transformers/examples/pytorch/text-classification
23
        python3 run_glue.py
24
            --learning_rate {lr}
25
            --output_dir /tmp/imdb-{lr}/
26
            --model_name_or_path bert-base-cased
27
            --dataset_name imdb
28
            --do_train
29
            --max_seq_length 128
30
            --per_device_train_batch_size 32
31
            --max_steps 50
32
            --fp16 --overwrite_output_dir 2>&1 | tee run-{lr}.log'
33
        """
34

35
    per_trial_resources = sky.Resources(accelerators={'V100': 1})
36

37
    task = sky.Task(
38
        # A descriptive name.
39
        f'task-{lr}',
40
        # Run command for each task, with different lr.
41
        run=run_format.format(lr=lr)).set_resources(per_trial_resources)
42

43
    # Set 'stream_logs=False' to not mix all tasks' outputs together.
44
    # Each task's output is redirected to run-{lr}.log and can be tail-ed.
45
    sky.exec(task, cluster_name='hgs', stream_logs=False, detach_run=True)
46
skypilot

Использование cookies