google-research
48 строк · 1.4 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Process csv data to tfrecords."""
17
18import os19
20from absl import app21from absl import flags22import tensorflow.compat.v1 as tf23
24from protein_lm import data25
26FLAGS = flags.FLAGS27
28flags.DEFINE_string(29'input_dir', default='', help=('Directory to load CSVs from.'))30flags.DEFINE_string(31'output_dir', default='', help=('Directory to output tfrecords to.'))32
33
34def main(argv):35if not FLAGS.input_dir:36raise ValueError('Must provide input directory.')37if not FLAGS.output_dir:38raise ValueError('Must provide output directory.')39
40files = tf.gfile.Glob(os.path.join(FLAGS.input_dir, '*.csv'))41tf.gfile.MakeDirs(FLAGS.output_dir)42for i, file in enumerate(files):43file = os.path.join(FLAGS.input_dir, file)44print(file)45data.csv_to_tfrecord(file, FLAGS.output_dir, idx=i, total=len(files))46
47if __name__ == '__main__':48app.run(main)49