pytorch-lightning
51 строка · 1.7 Кб
1# Copyright The Lightning AI team.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import hashlib16from typing import List17
18
19def _get_hash(files: List[str], algorithm: str = "blake2", chunk_num_blocks: int = 128) -> str:20"""Hashes the contents of a list of files.21
22Parameters
23----------
24files: List[Path]
25List of files.
26algorithm: str, default "blake2"
27Algorithm to hash contents. "blake2" is set by default because it
28is faster than "md5". [1]
29chunk_num_blocks: int, default 128
30Block size to user when iterating over file chunks.
31
32References
33----------
34[1] https://crypto.stackexchange.com/questions/70101/blake2-vs-md5-for-checksum-file-integrity
35[2] https://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python
36
37"""
38# validate input39if algorithm == "blake2":40h = hashlib.blake2b(digest_size=20)41elif algorithm == "md5":42h = hashlib.md5()43else:44raise ValueError(f"Algorithm {algorithm} not supported")45
46# calculate hash for all files47for file in files:48with open(file, "rb") as f:49for chunk in iter(lambda: f.read(chunk_num_blocks * h.block_size), b""):50h.update(chunk)51return h.hexdigest()52