google-research
59 строк · 1.6 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Download and prepare TFDS data."""
17
18from absl import app19from absl import flags20import tensorflow_datasets as tfds21
22FLAGS = flags.FLAGS23
24flags.DEFINE_string(25'data_dir', default=None,26help='Directory to store data.')27
28
29def main(_):30newscommentary_config = tfds.translate.wmt.WmtConfig(31version='1.0.0',32language_pair=('de', 'en'),33subsets={34tfds.Split.TRAIN: ['newscommentary_v13'],35tfds.Split.VALIDATION: ['newscommentary_v13'],36},37name='newscommentary')38paracrawl_config = tfds.translate.wmt.WmtConfig(39version='1.0.0',40language_pair=('de', 'en'),41subsets={42tfds.Split.TRAIN: ['paracrawl_v1'],43},44name='paracrawl')45
46nc_builder = tfds.builder(47'wmt_translate',48config=newscommentary_config,49data_dir=FLAGS.data_dir)50para_builder = tfds.builder(51'wmt_translate',52config=paracrawl_config,53data_dir=FLAGS.data_dir)54nc_builder.download_and_prepare()55para_builder.download_and_prepare()56
57
58if __name__ == '__main__':59app.run(main)60