google-research

Форк
0
59 строк · 1.6 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Download and prepare TFDS data."""
17

18
from absl import app
19
from absl import flags
20
import tensorflow_datasets as tfds
21

22
FLAGS = flags.FLAGS
23

24
flags.DEFINE_string(
25
    'data_dir', default=None,
26
    help='Directory to store data.')
27

28

29
def main(_):
30
  newscommentary_config = tfds.translate.wmt.WmtConfig(
31
      version='1.0.0',
32
      language_pair=('de', 'en'),
33
      subsets={
34
          tfds.Split.TRAIN: ['newscommentary_v13'],
35
          tfds.Split.VALIDATION: ['newscommentary_v13'],
36
      },
37
      name='newscommentary')
38
  paracrawl_config = tfds.translate.wmt.WmtConfig(
39
      version='1.0.0',
40
      language_pair=('de', 'en'),
41
      subsets={
42
          tfds.Split.TRAIN: ['paracrawl_v1'],
43
      },
44
      name='paracrawl')
45

46
  nc_builder = tfds.builder(
47
      'wmt_translate',
48
      config=newscommentary_config,
49
      data_dir=FLAGS.data_dir)
50
  para_builder = tfds.builder(
51
      'wmt_translate',
52
      config=paracrawl_config,
53
      data_dir=FLAGS.data_dir)
54
  nc_builder.download_and_prepare()
55
  para_builder.download_and_prepare()
56

57

58
if __name__ == '__main__':
59
  app.run(main)
60

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.