TheAlgorithms-Python
125 строк · 3.5 Кб
1"""
2One of the several implementations of Lempel–Ziv–Welch compression algorithm
3https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
4"""
5
6import math7import os8import sys9
10
11def read_file_binary(file_path: str) -> str:12"""13Reads given file as bytes and returns them as a long string
14"""
15result = ""16try:17with open(file_path, "rb") as binary_file:18data = binary_file.read()19for dat in data:20curr_byte = f"{dat:08b}"21result += curr_byte22return result23except OSError:24print("File not accessible")25sys.exit()26
27
28def add_key_to_lexicon(29lexicon: dict[str, str], curr_string: str, index: int, last_match_id: str30) -> None:31"""32Adds new strings (curr_string + "0", curr_string + "1") to the lexicon
33"""
34lexicon.pop(curr_string)35lexicon[curr_string + "0"] = last_match_id36
37if math.log2(index).is_integer():38for curr_key in lexicon:39lexicon[curr_key] = "0" + lexicon[curr_key]40
41lexicon[curr_string + "1"] = bin(index)[2:]42
43
44def compress_data(data_bits: str) -> str:45"""46Compresses given data_bits using Lempel–Ziv–Welch compression algorithm
47and returns the result as a string
48"""
49lexicon = {"0": "0", "1": "1"}50result, curr_string = "", ""51index = len(lexicon)52
53for i in range(len(data_bits)):54curr_string += data_bits[i]55if curr_string not in lexicon:56continue57
58last_match_id = lexicon[curr_string]59result += last_match_id60add_key_to_lexicon(lexicon, curr_string, index, last_match_id)61index += 162curr_string = ""63
64while curr_string != "" and curr_string not in lexicon:65curr_string += "0"66
67if curr_string != "":68last_match_id = lexicon[curr_string]69result += last_match_id70
71return result72
73
74def add_file_length(source_path: str, compressed: str) -> str:75"""76Adds given file's length in front (using Elias gamma coding) of the compressed
77string
78"""
79file_length = os.path.getsize(source_path)80file_length_binary = bin(file_length)[2:]81length_length = len(file_length_binary)82
83return "0" * (length_length - 1) + file_length_binary + compressed84
85
86def write_file_binary(file_path: str, to_write: str) -> None:87"""88Writes given to_write string (should only consist of 0's and 1's) as bytes in the
89file
90"""
91byte_length = 892try:93with open(file_path, "wb") as opened_file:94result_byte_array = [95to_write[i : i + byte_length]96for i in range(0, len(to_write), byte_length)97]98
99if len(result_byte_array[-1]) % byte_length == 0:100result_byte_array.append("10000000")101else:102result_byte_array[-1] += "1" + "0" * (103byte_length - len(result_byte_array[-1]) - 1104)105
106for elem in result_byte_array:107opened_file.write(int(elem, 2).to_bytes(1, byteorder="big"))108except OSError:109print("File not accessible")110sys.exit()111
112
113def compress(source_path: str, destination_path: str) -> None:114"""115Reads source file, compresses it and writes the compressed result in destination
116file
117"""
118data_bits = read_file_binary(source_path)119compressed = compress_data(data_bits)120compressed = add_file_length(source_path, compressed)121write_file_binary(destination_path, compressed)122
123
124if __name__ == "__main__":125compress(sys.argv[1], sys.argv[2])126