simpletransformers

Форк
0
105 строк · 3.3 Кб
1
import json
2
import logging
3
import os
4

5
from tqdm.auto import tqdm
6

7
from simpletransformers.question_answering import QuestionAnsweringModel
8

9
logging.basicConfig(level=logging.INFO)
10
transformers_logger = logging.getLogger("transformers")
11
transformers_logger.setLevel(logging.WARNING)
12

13
# Create dummy data to use for training.
14
train_data = [
15
    {
16
        "context": "This is the first context",
17
        "qas": [
18
            {
19
                "id": "00001",
20
                "is_impossible": False,
21
                "question": "Which context is this?",
22
                "answers": [{"text": "the first", "answer_start": 8}],
23
            }
24
        ],
25
    },
26
    {
27
        "context": "Other legislation followed, including the Migratory Bird Conservation Act of 1929, a 1937 treaty prohibiting the hunting of right and gray whales,\
28
            and the Bald Eagle Protection Act of 1940. These later laws had a low cost to society—the species were relatively rare—and little opposition was raised",
29
        "qas": [
30
            {
31
                "id": "00002",
32
                "is_impossible": False,
33
                "question": "What was the cost to society?",
34
                "answers": [{"text": "low cost", "answer_start": 225}],
35
            },
36
            {
37
                "id": "00003",
38
                "is_impossible": False,
39
                "question": "What was the name of the 1937 treaty?",
40
                "answers": [{"text": "Bald Eagle Protection Act", "answer_start": 167}],
41
            },
42
            {
43
                "id": "00004",
44
                "is_impossible": True,
45
                "question": "How did Alexandar Hamilton die?",
46
                "answers": [],
47
            },
48
        ],
49
    },
50
]  # noqa: ignore flake8"
51

52
for i in range(20):
53
    train_data.extend(train_data)
54

55
# Save as a JSON file
56
os.makedirs("data", exist_ok=True)
57
with open("data/train.json", "w") as f:
58
    json.dump(train_data, f)
59

60
# Save as a JSONL file
61
with open("data/train.jsonl", "w") as outfile:
62
    for entry in tqdm(train_data):
63
        json.dump(entry, outfile)
64
        outfile.write("\n")
65

66
train_args = {
67
    "reprocess_input_data": True,
68
    "overwrite_output_dir": True,
69
    "evaluate_during_training": True,
70
    "evaluate_during_training_steps": 10000,
71
    "train_batch_size": 8,
72
    "num_train_epochs": 1,
73
    # 'wandb_project': 'test-new-project',
74
    # "use_early_stopping": True,
75
    "n_best_size": 3,
76
    "fp16": False,
77
    "no_save": True,
78
    "manual_seed": 4,
79
    "max_seq_length": 512,
80
    "no_save": True,
81
    "n_best_size": 10,
82
    "lazy_loading": True,
83
    # "use_multiprocessing": False,
84
}
85

86
# Create the QuestionAnsweringModel
87
model = QuestionAnsweringModel(
88
    "bert", "bert-base-cased", args=train_args, use_cuda=True, cuda_device=0
89
)
90

91
# Train the model with JSON file
92
model.train_model("data/train.jsonl", eval_data="data/train.json")
93

94
# Making predictions using the model.
95
to_predict = [
96
    {
97
        "context": "Other legislation followed, including the Migratory Bird Conservation Act of 1929, a 1937 treaty prohibiting the hunting of right and gray whales,\
98
            and the Bald Eagle Protection Act of 1940. These later laws had a low cost to society—the species were relatively rare—and little opposition was raised",
99
        "qas": [{"question": "What was the name of the 1937 treaty?", "id": "0"}],
100
    }
101
]
102

103
print(model.predict(to_predict, n_best_size=2))
104

105
# flake8: noqa
106

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.