pytorch

Форк
0
/
file_io_utils.py 
101 строка · 2.9 Кб
1
import json
2
import re
3
import shutil
4
from pathlib import Path
5
from typing import Any, List
6

7
import boto3  # type: ignore[import]
8

9

10
def zip_folder(folder_to_zip: Path, dest_file_base_name: Path) -> Path:
11
    """
12
    Returns the path to the resulting zip file, with the appropriate extension added if needed
13
    """
14
    # shutil.make_archive will append .zip to the dest_file_name, so we need to remove it if it's already there
15
    if dest_file_base_name.suffix == ".zip":
16
        dest_file_base_name = dest_file_base_name.with_suffix("")
17

18
    ensure_dir_exists(dest_file_base_name.parent)
19

20
    print(f"Zipping {folder_to_zip}\n     to {dest_file_base_name}")
21
    # Convert to string because shutil.make_archive doesn't like Path objects
22
    return Path(shutil.make_archive(str(dest_file_base_name), "zip", folder_to_zip))
23

24

25
def unzip_folder(zip_file_path: Path, unzip_to_folder: Path) -> None:
26
    """
27
    Returns the path to the unzipped folder
28
    """
29
    print(f"Unzipping {zip_file_path}")
30
    print(f"       to {unzip_to_folder}")
31
    shutil.unpack_archive(zip_file_path, unzip_to_folder, "zip")
32

33

34
def ensure_dir_exists(dir: Path) -> None:
35
    dir.mkdir(parents=True, exist_ok=True)
36

37

38
def copy_file(source_file: Path, dest_file: Path) -> None:
39
    ensure_dir_exists(dest_file.parent)
40
    shutil.copyfile(source_file, dest_file)
41

42

43
def load_json_file(file_path: Path) -> Any:
44
    """
45
    Returns the deserialized json object
46
    """
47
    with open(file_path) as f:
48
        return json.load(f)
49

50

51
def write_json_file(file_path: Path, content: Any) -> None:
52
    dir = file_path.parent
53
    ensure_dir_exists(dir)
54

55
    with open(file_path, "w") as f:
56
        json.dump(content, f, indent=2)
57

58

59
def sanitize_for_s3(text: str) -> str:
60
    """
61
    S3 keys can only contain alphanumeric characters, underscores, and dashes.
62
    This function replaces all other characters with underscores.
63
    """
64
    return re.sub(r"[^a-zA-Z0-9_-]", "_", text)
65

66

67
def upload_file_to_s3(file_name: Path, bucket: str, key: str) -> None:
68
    print(f"Uploading {file_name}")
69
    print(f"       to s3://{bucket}/{key}")
70

71
    boto3.client("s3").upload_file(
72
        str(file_name),
73
        bucket,
74
        key,
75
    )
76

77

78
def download_s3_objects_with_prefix(
79
    bucket_name: str, prefix: str, download_folder: Path
80
) -> List[Path]:
81
    s3 = boto3.resource("s3")
82
    bucket = s3.Bucket(bucket_name)
83

84
    downloads = []
85

86
    for obj in bucket.objects.filter(Prefix=prefix):
87
        download_path = download_folder / obj.key
88

89
        ensure_dir_exists(download_path.parent)
90
        print(f"Downloading s3://{bucket.name}/{obj.key}")
91
        print(f"         to {download_path}")
92

93
        s3.Object(bucket.name, obj.key).download_file(str(download_path))
94
        downloads.append(download_path)
95

96
    if len(downloads) == 0:
97
        print(
98
            f"There were no files matching the prefix `{prefix}` in bucket `{bucket.name}`"
99
        )
100

101
    return downloads
102

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.