TheAlgorithms-Python
444 строки · 11.1 Кб
1"""
2The MD5 algorithm is a hash function that's commonly used as a checksum to
3detect data corruption. The algorithm works by processing a given message in
4blocks of 512 bits, padding the message as needed. It uses the blocks to operate
5a 128-bit state and performs a total of 64 such operations. Note that all values
6are little-endian, so inputs are converted as needed.
7
8Although MD5 was used as a cryptographic hash function in the past, it's since
9been cracked, so it shouldn't be used for security purposes.
10
11For more info, see https://en.wikipedia.org/wiki/MD5
12"""
13
14from collections.abc import Generator15from math import sin16
17
18def to_little_endian(string_32: bytes) -> bytes:19"""20Converts the given string to little-endian in groups of 8 chars.
21
22Arguments:
23string_32 {[string]} -- [32-char string]
24
25Raises:
26ValueError -- [input is not 32 char]
27
28Returns:
2932-char little-endian string
30>>> to_little_endian(b'1234567890abcdfghijklmnopqrstuvw')
31b'pqrstuvwhijklmno90abcdfg12345678'
32>>> to_little_endian(b'1234567890')
33Traceback (most recent call last):
34...
35ValueError: Input must be of length 32
36"""
37if len(string_32) != 32:38raise ValueError("Input must be of length 32")39
40little_endian = b""41for i in [3, 2, 1, 0]:42little_endian += string_32[8 * i : 8 * i + 8]43return little_endian44
45
46def reformat_hex(i: int) -> bytes:47"""48Converts the given non-negative integer to hex string.
49
50Example: Suppose the input is the following:
51i = 1234
52
53The input is 0x000004d2 in hex, so the little-endian hex string is
54"d2040000".
55
56Arguments:
57i {[int]} -- [integer]
58
59Raises:
60ValueError -- [input is negative]
61
62Returns:
638-char little-endian hex string
64
65>>> reformat_hex(1234)
66b'd2040000'
67>>> reformat_hex(666)
68b'9a020000'
69>>> reformat_hex(0)
70b'00000000'
71>>> reformat_hex(1234567890)
72b'd2029649'
73>>> reformat_hex(1234567890987654321)
74b'b11c6cb1'
75>>> reformat_hex(-1)
76Traceback (most recent call last):
77...
78ValueError: Input must be non-negative
79"""
80if i < 0:81raise ValueError("Input must be non-negative")82
83hex_rep = format(i, "08x")[-8:]84little_endian_hex = b""85for i in [3, 2, 1, 0]:86little_endian_hex += hex_rep[2 * i : 2 * i + 2].encode("utf-8")87return little_endian_hex88
89
90def preprocess(message: bytes) -> bytes:91"""92Preprocesses the message string:
93- Convert message to bit string
94- Pad bit string to a multiple of 512 chars:
95- Append a 1
96- Append 0's until length = 448 (mod 512)
97- Append length of original message (64 chars)
98
99Example: Suppose the input is the following:
100message = "a"
101
102The message bit string is "01100001", which is 8 bits long. Thus, the
103bit string needs 439 bits of padding so that
104(bit_string + "1" + padding) = 448 (mod 512).
105The message length is "000010000...0" in 64-bit little-endian binary.
106The combined bit string is then 512 bits long.
107
108Arguments:
109message {[string]} -- [message string]
110
111Returns:
112processed bit string padded to a multiple of 512 chars
113
114>>> preprocess(b"a") == (b"01100001" + b"1" +
115... (b"0" * 439) + b"00001000" + (b"0" * 56))
116True
117>>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64)
118True
119"""
120bit_string = b""121for char in message:122bit_string += format(char, "08b").encode("utf-8")123start_len = format(len(bit_string), "064b").encode("utf-8")124
125# Pad bit_string to a multiple of 512 chars126bit_string += b"1"127while len(bit_string) % 512 != 448:128bit_string += b"0"129bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32])130
131return bit_string132
133
134def get_block_words(bit_string: bytes) -> Generator[list[int], None, None]:135"""136Splits bit string into blocks of 512 chars and yields each block as a list
137of 32-bit words
138
139Example: Suppose the input is the following:
140bit_string =
141"000000000...0" + # 0x00 (32 bits, padded to the right)
142"000000010...0" + # 0x01 (32 bits, padded to the right)
143"000000100...0" + # 0x02 (32 bits, padded to the right)
144"000000110...0" + # 0x03 (32 bits, padded to the right)
145...
146"000011110...0" # 0x0a (32 bits, padded to the right)
147
148Then len(bit_string) == 512, so there'll be 1 block. The block is split
149into 32-bit words, and each word is converted to little endian. The
150first word is interpreted as 0 in decimal, the second word is
151interpreted as 1 in decimal, etc.
152
153Thus, block_words == [[0, 1, 2, 3, ..., 15]].
154
155Arguments:
156bit_string {[string]} -- [bit string with multiple of 512 as length]
157
158Raises:
159ValueError -- [length of bit string isn't multiple of 512]
160
161Yields:
162a list of 16 32-bit words
163
164>>> test_string = ("".join(format(n << 24, "032b") for n in range(16))
165... .encode("utf-8"))
166>>> list(get_block_words(test_string))
167[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
168>>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4
169True
170>>> list(get_block_words(b"1" * 512)) == [[4294967295] * 16]
171True
172>>> list(get_block_words(b""))
173[]
174>>> list(get_block_words(b"1111"))
175Traceback (most recent call last):
176...
177ValueError: Input must have length that's a multiple of 512
178"""
179if len(bit_string) % 512 != 0:180raise ValueError("Input must have length that's a multiple of 512")181
182for pos in range(0, len(bit_string), 512):183block = bit_string[pos : pos + 512]184block_words = []185for i in range(0, 512, 32):186block_words.append(int(to_little_endian(block[i : i + 32]), 2))187yield block_words188
189
190def not_32(i: int) -> int:191"""192Perform bitwise NOT on given int.
193
194Arguments:
195i {[int]} -- [given int]
196
197Raises:
198ValueError -- [input is negative]
199
200Returns:
201Result of bitwise NOT on i
202
203>>> not_32(34)
2044294967261
205>>> not_32(1234)
2064294966061
207>>> not_32(4294966061)
2081234
209>>> not_32(0)
2104294967295
211>>> not_32(1)
2124294967294
213>>> not_32(-1)
214Traceback (most recent call last):
215...
216ValueError: Input must be non-negative
217"""
218if i < 0:219raise ValueError("Input must be non-negative")220
221i_str = format(i, "032b")222new_str = ""223for c in i_str:224new_str += "1" if c == "0" else "0"225return int(new_str, 2)226
227
228def sum_32(a: int, b: int) -> int:229"""230Add two numbers as 32-bit ints.
231
232Arguments:
233a {[int]} -- [first given int]
234b {[int]} -- [second given int]
235
236Returns:
237(a + b) as an unsigned 32-bit int
238
239>>> sum_32(1, 1)
2402
241>>> sum_32(2, 3)
2425
243>>> sum_32(0, 0)
2440
245>>> sum_32(-1, -1)
2464294967294
247>>> sum_32(4294967295, 1)
2480
249"""
250return (a + b) % 2**32251
252
253def left_rotate_32(i: int, shift: int) -> int:254"""255Rotate the bits of a given int left by a given amount.
256
257Arguments:
258i {[int]} -- [given int]
259shift {[int]} -- [shift amount]
260
261Raises:
262ValueError -- [either given int or shift is negative]
263
264Returns:
265`i` rotated to the left by `shift` bits
266
267>>> left_rotate_32(1234, 1)
2682468
269>>> left_rotate_32(1111, 4)
27017776
271>>> left_rotate_32(2147483648, 1)
2721
273>>> left_rotate_32(2147483648, 3)
2744
275>>> left_rotate_32(4294967295, 4)
2764294967295
277>>> left_rotate_32(1234, 0)
2781234
279>>> left_rotate_32(0, 0)
2800
281>>> left_rotate_32(-1, 0)
282Traceback (most recent call last):
283...
284ValueError: Input must be non-negative
285>>> left_rotate_32(0, -1)
286Traceback (most recent call last):
287...
288ValueError: Shift must be non-negative
289"""
290if i < 0:291raise ValueError("Input must be non-negative")292if shift < 0:293raise ValueError("Shift must be non-negative")294return ((i << shift) ^ (i >> (32 - shift))) % 2**32295
296
297def md5_me(message: bytes) -> bytes:298"""299Returns the 32-char MD5 hash of a given message.
300
301Reference: https://en.wikipedia.org/wiki/MD5#Algorithm
302
303Arguments:
304message {[string]} -- [message]
305
306Returns:
30732-char MD5 hash string
308
309>>> md5_me(b"")
310b'd41d8cd98f00b204e9800998ecf8427e'
311>>> md5_me(b"The quick brown fox jumps over the lazy dog")
312b'9e107d9d372bb6826bd81d3542a419d6'
313>>> md5_me(b"The quick brown fox jumps over the lazy dog.")
314b'e4d909c290d0fb1ca068ffaddf22cbd0'
315
316>>> import hashlib
317>>> from string import ascii_letters
318>>> msgs = [b"", ascii_letters.encode("utf-8"), "Üñîçø∂é".encode("utf-8"),
319... b"The quick brown fox jumps over the lazy dog."]
320>>> all(md5_me(msg) == hashlib.md5(msg).hexdigest().encode("utf-8") for msg in msgs)
321True
322"""
323
324# Convert to bit string, add padding and append message length325bit_string = preprocess(message)326
327added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)]328
329# Starting states330a0 = 0x67452301331b0 = 0xEFCDAB89332c0 = 0x98BADCFE333d0 = 0x10325476334
335shift_amounts = [3367,33712,33817,33922,3407,34112,34217,34322,3447,34512,34617,34722,3487,34912,35017,35122,3525,3539,35414,35520,3565,3579,35814,35920,3605,3619,36214,36320,3645,3659,36614,36720,3684,36911,37016,37123,3724,37311,37416,37523,3764,37711,37816,37923,3804,38111,38216,38323,3846,38510,38615,38721,3886,38910,39015,39121,3926,39310,39415,39521,3966,39710,39815,39921,400]401
402# Process bit string in chunks, each with 16 32-char words403for block_words in get_block_words(bit_string):404a = a0405b = b0406c = c0407d = d0408
409# Hash current chunk410for i in range(64):411if i <= 15:412# f = (b & c) | (not_32(b) & d) # Alternate definition for f413f = d ^ (b & (c ^ d))414g = i415elif i <= 31:416# f = (d & b) | (not_32(d) & c) # Alternate definition for f417f = c ^ (d & (b ^ c))418g = (5 * i + 1) % 16419elif i <= 47:420f = b ^ c ^ d421g = (3 * i + 5) % 16422else:423f = c ^ (b | not_32(d))424g = (7 * i) % 16425f = (f + a + added_consts[i] + block_words[g]) % 2**32426a = d427d = c428c = b429b = sum_32(b, left_rotate_32(f, shift_amounts[i]))430
431# Add hashed chunk to running total432a0 = sum_32(a0, a)433b0 = sum_32(b0, b)434c0 = sum_32(c0, c)435d0 = sum_32(d0, d)436
437digest = reformat_hex(a0) + reformat_hex(b0) + reformat_hex(c0) + reformat_hex(d0)438return digest439
440
441if __name__ == "__main__":442import doctest443
444doctest.testmod()445