paddlenlp
1# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import csv16import os17import tarfile18import zipfile19from typing import Callable, Iterable20
21from ppfleetx.distributed.apis import env22
23
24@env.work_at_local_rank025def unzip(zip_path, mode="r", out_dir=None, delete=False):26with zipfile.ZipFile(zip_path, mode) as zip_ref:27zip_ref.extractall(out_dir)28
29if delete:30os.remove(zip_path)31
32
33@env.work_at_local_rank034def untar(tar_path, mode="r:gz", out_dir=None, delete=False):35try:36with tarfile.open(tar_path, "r:gz") as f:37f.extractall(out_dir)38finally:39if delete:40os.remove(tar_path)41
42
43def parse_csv(44path, skip_lines=0, delimiter=" ", quotechar="|", quoting=csv.QUOTE_NONE, map_funcs=None, filter_funcs=None45):46
47with open(path, newline="") as csvfile:48data = []49spamreader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar, quoting=quoting)50for idx, row in enumerate(spamreader):51if idx < skip_lines:52continue53filter_flag = True54if filter_funcs is not None:55if isinstance(filter_funcs, Iterable):56for func in filter_funcs:57filter_flag = func(row)58if filter_flag is False:59break60else:61assert isinstance(filter_funcs, Callable)62filter_flag = filter_funcs(row)63if filter_flag is False:64continue65
66if map_funcs is not None:67if isinstance(map_funcs, Iterable):68for func in map_funcs:69row = func(row)70else:71assert isinstance(map_funcs, Callable)72row = map_funcs(row)73data.append(row)74return data75