paddlenlp

Форк
0
/
test_bert.py 
97 строк · 3.2 Кб
1
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15
from __future__ import annotations
16

17
import os
18
import sys
19
from unittest import TestCase
20

21
from paddlenlp.utils.downloader import get_path_from_url_with_filelock
22
from paddlenlp.utils.log import logger
23
from tests.testing_utils import argv_context_guard, load_test_config
24

25

26
class BERT_Test(TestCase):
27
    def download_corpus(self, input_dir):
28
        os.makedirs(input_dir, exist_ok=True)
29
        files = [
30
            "https://bj.bcebos.com/paddlenlp/models/transformers/bert/data/training_data.hdf5",
31
        ]
32

33
        for file in files:
34
            file_name = file.split("/")[-1]
35
            file_path = os.path.join(input_dir, file_name)
36
            if not os.path.exists(file_path):
37
                logger.info(f"start to download corpus: <{file_name}> into <{input_dir}>")
38
                get_path_from_url_with_filelock(file, root_dir=input_dir)
39

40
    def setUp(self) -> None:
41
        self.path = "./model_zoo/bert"
42
        self.config_path = "./tests/fixtures/model_zoo/bert.yaml"
43
        sys.path.insert(0, self.path)
44

45
    def tearDown(self) -> None:
46
        sys.path.remove(self.path)
47

48
    def test_pretrain(self):
49

50
        # 1. run pretrain
51
        pretrain_config = load_test_config(self.config_path, "pretrain")
52
        self.download_corpus(pretrain_config["input_dir"])
53
        with argv_context_guard(pretrain_config):
54
            from run_pretrain_trainer import do_train
55

56
            do_train()
57

58
        # 2. export model
59
        export_config = {
60
            "model_type": pretrain_config["model_type"],
61
            "model_path": pretrain_config["output_dir"],
62
            "output_path": "infer_model/model",
63
        }
64
        with argv_context_guard(export_config):
65
            from export_model import main
66

67
            main()
68

69
        # 3. infer model of glue
70
        glue_config = load_test_config(self.config_path, "glue")
71
        infer_config = {
72
            "model_type": export_config["model_type"],
73
            "model_path": export_config["output_path"],
74
            "task_name": glue_config["task_name"],
75
        }
76
        with argv_context_guard(infer_config):
77
            from predict_glue import main
78

79
            main()
80

81
        # infer model of samples
82
        infer_config = {
83
            "model_path": export_config["output_path"],
84
            "device": pretrain_config["device"],
85
        }
86
        with argv_context_guard(infer_config):
87
            from predict import main
88

89
            main()
90

91
    def test_glue(self):
92

93
        glue_config = load_test_config(self.config_path, "glue")
94
        with argv_context_guard(glue_config):
95
            from run_glue_trainer import do_train
96

97
            do_train()
98

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.