paddlenlp

Форк
0
/
test_pretrain.py 
81 строка · 2.5 Кб
1
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
from __future__ import annotations
15

16
import shutil
17
import sys
18
import tempfile
19
import unittest
20

21
from parameterized import parameterized_class
22

23
from paddlenlp.utils.downloader import get_path_from_url
24
from tests.testing_utils import argv_context_guard, load_test_config
25

26
from .testing_utils import LLMTest
27

28

29
@parameterized_class(
30
    ["model_dir"],
31
    [
32
        ["llama"],
33
        ["qwen"],
34
        ["gpt"],
35
    ],
36
)
37
class PretrainTest(LLMTest, unittest.TestCase):
38
    config_path: str = "./tests/fixtures/llm/pretrain.yaml"
39
    model_dir: str = None
40

41
    def setUp(self) -> None:
42
        LLMTest.setUp(self)
43

44
        self.dataset_dir = tempfile.mkdtemp()
45
        self.model_codes_dir = self.root_path
46

47
    def tearDown(self) -> None:
48
        LLMTest.tearDown(self)
49
        shutil.rmtree(self.dataset_dir)
50

51
    def test_pretrain(self):
52

53
        pretrain_flag = False
54
        for key, value in sys.modules.items():
55
            if "run_pretrain" in key:
56
                pretrain_flag = True
57
                break
58
        if pretrain_flag:
59
            del sys.modules["run_pretrain"]
60

61
        # Run pretrain
62
        URL = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_ids.npy"
63
        URL2 = "https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_idx.npz"
64
        get_path_from_url(URL, root_dir=self.dataset_dir)
65
        get_path_from_url(URL2, root_dir=self.dataset_dir)
66

67
        pretrain_config = load_test_config(self.config_path, "pretrain", self.model_dir)
68

69
        pretrain_config["input_dir"] = self.dataset_dir
70
        pretrain_config["output_dir"] = self.output_dir
71

72
        with argv_context_guard(pretrain_config):
73
            from run_pretrain import main
74

75
            main()
76

77
        # Now, only work for llama, not gpt or qwen
78
        if self.model_dir == "llama":
79
            self.run_predictor({"inference_model": True})
80

81
        self.run_predictor({"inference_model": False})
82

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.