google-research

Форк
0
/
preprocess_main.py 
48 строк · 1.5 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Preprocesses a specific split of the CFQ dataset."""
17

18
from absl import app
19
from absl import flags
20

21
from cfq import preprocess as preprocessor
22

23
FLAGS = flags.FLAGS
24

25
flags.DEFINE_string('dataset', None,
26
                    'Name of the TFDS dataset. Use cfq or scan.')
27

28
flags.DEFINE_string('split', None, 'Name of the  to the JSON file containing '
29
                    'split information.')
30

31
flags.DEFINE_string('save_path', None, 'Path to the directory where to '
32
                    'save the files to.')
33

34
flags.mark_flag_as_required('save_path')
35

36

37
def main(argv):
38
  if len(argv) > 1:
39
    raise app.UsageError('Too many command-line arguments.')
40

41
  dataset = preprocessor.get_dataset_from_tfds(FLAGS.dataset, FLAGS.split)
42
  preprocessor.write_dataset(dataset, FLAGS.save_path)
43
  token_vocab = preprocessor.get_token_vocab(FLAGS.save_path)
44
  preprocessor.write_token_vocab(token_vocab, FLAGS.save_path)
45

46

47
if __name__ == '__main__':
48
  app.run(main)
49

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.