TheAlgorithms-Python

Форк
0
444 строки · 11.1 Кб
1
"""
2
The MD5 algorithm is a hash function that's commonly used as a checksum to
3
detect data corruption. The algorithm works by processing a given message in
4
blocks of 512 bits, padding the message as needed. It uses the blocks to operate
5
a 128-bit state and performs a total of 64 such operations. Note that all values
6
are little-endian, so inputs are converted as needed.
7

8
Although MD5 was used as a cryptographic hash function in the past, it's since
9
been cracked, so it shouldn't be used for security purposes.
10

11
For more info, see https://en.wikipedia.org/wiki/MD5
12
"""
13

14
from collections.abc import Generator
15
from math import sin
16

17

18
def to_little_endian(string_32: bytes) -> bytes:
19
    """
20
    Converts the given string to little-endian in groups of 8 chars.
21

22
    Arguments:
23
        string_32 {[string]} -- [32-char string]
24

25
    Raises:
26
        ValueError -- [input is not 32 char]
27

28
    Returns:
29
        32-char little-endian string
30
    >>> to_little_endian(b'1234567890abcdfghijklmnopqrstuvw')
31
    b'pqrstuvwhijklmno90abcdfg12345678'
32
    >>> to_little_endian(b'1234567890')
33
    Traceback (most recent call last):
34
    ...
35
    ValueError: Input must be of length 32
36
    """
37
    if len(string_32) != 32:
38
        raise ValueError("Input must be of length 32")
39

40
    little_endian = b""
41
    for i in [3, 2, 1, 0]:
42
        little_endian += string_32[8 * i : 8 * i + 8]
43
    return little_endian
44

45

46
def reformat_hex(i: int) -> bytes:
47
    """
48
    Converts the given non-negative integer to hex string.
49

50
    Example: Suppose the input is the following:
51
        i = 1234
52

53
        The input is 0x000004d2 in hex, so the little-endian hex string is
54
        "d2040000".
55

56
    Arguments:
57
        i {[int]} -- [integer]
58

59
    Raises:
60
        ValueError -- [input is negative]
61

62
    Returns:
63
        8-char little-endian hex string
64

65
    >>> reformat_hex(1234)
66
    b'd2040000'
67
    >>> reformat_hex(666)
68
    b'9a020000'
69
    >>> reformat_hex(0)
70
    b'00000000'
71
    >>> reformat_hex(1234567890)
72
    b'd2029649'
73
    >>> reformat_hex(1234567890987654321)
74
    b'b11c6cb1'
75
    >>> reformat_hex(-1)
76
    Traceback (most recent call last):
77
    ...
78
    ValueError: Input must be non-negative
79
    """
80
    if i < 0:
81
        raise ValueError("Input must be non-negative")
82

83
    hex_rep = format(i, "08x")[-8:]
84
    little_endian_hex = b""
85
    for i in [3, 2, 1, 0]:
86
        little_endian_hex += hex_rep[2 * i : 2 * i + 2].encode("utf-8")
87
    return little_endian_hex
88

89

90
def preprocess(message: bytes) -> bytes:
91
    """
92
    Preprocesses the message string:
93
    - Convert message to bit string
94
    - Pad bit string to a multiple of 512 chars:
95
        - Append a 1
96
        - Append 0's until length = 448 (mod 512)
97
        - Append length of original message (64 chars)
98

99
    Example: Suppose the input is the following:
100
        message = "a"
101

102
        The message bit string is "01100001", which is 8 bits long. Thus, the
103
        bit string needs 439 bits of padding so that
104
        (bit_string + "1" + padding) = 448 (mod 512).
105
        The message length is "000010000...0" in 64-bit little-endian binary.
106
        The combined bit string is then 512 bits long.
107

108
    Arguments:
109
        message {[string]} -- [message string]
110

111
    Returns:
112
        processed bit string padded to a multiple of 512 chars
113

114
    >>> preprocess(b"a") == (b"01100001" + b"1" +
115
    ...                     (b"0" * 439) + b"00001000" + (b"0" * 56))
116
    True
117
    >>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64)
118
    True
119
    """
120
    bit_string = b""
121
    for char in message:
122
        bit_string += format(char, "08b").encode("utf-8")
123
    start_len = format(len(bit_string), "064b").encode("utf-8")
124

125
    # Pad bit_string to a multiple of 512 chars
126
    bit_string += b"1"
127
    while len(bit_string) % 512 != 448:
128
        bit_string += b"0"
129
    bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32])
130

131
    return bit_string
132

133

134
def get_block_words(bit_string: bytes) -> Generator[list[int], None, None]:
135
    """
136
    Splits bit string into blocks of 512 chars and yields each block as a list
137
    of 32-bit words
138

139
    Example: Suppose the input is the following:
140
        bit_string =
141
            "000000000...0" +  # 0x00 (32 bits, padded to the right)
142
            "000000010...0" +  # 0x01 (32 bits, padded to the right)
143
            "000000100...0" +  # 0x02 (32 bits, padded to the right)
144
            "000000110...0" +  # 0x03 (32 bits, padded to the right)
145
            ...
146
            "000011110...0"    # 0x0a (32 bits, padded to the right)
147

148
        Then len(bit_string) == 512, so there'll be 1 block. The block is split
149
        into 32-bit words, and each word is converted to little endian. The
150
        first word is interpreted as 0 in decimal, the second word is
151
        interpreted as 1 in decimal, etc.
152

153
        Thus, block_words == [[0, 1, 2, 3, ..., 15]].
154

155
    Arguments:
156
        bit_string {[string]} -- [bit string with multiple of 512 as length]
157

158
    Raises:
159
        ValueError -- [length of bit string isn't multiple of 512]
160

161
    Yields:
162
        a list of 16 32-bit words
163

164
    >>> test_string = ("".join(format(n << 24, "032b") for n in range(16))
165
    ...                  .encode("utf-8"))
166
    >>> list(get_block_words(test_string))
167
    [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
168
    >>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4
169
    True
170
    >>> list(get_block_words(b"1" * 512)) == [[4294967295] * 16]
171
    True
172
    >>> list(get_block_words(b""))
173
    []
174
    >>> list(get_block_words(b"1111"))
175
    Traceback (most recent call last):
176
    ...
177
    ValueError: Input must have length that's a multiple of 512
178
    """
179
    if len(bit_string) % 512 != 0:
180
        raise ValueError("Input must have length that's a multiple of 512")
181

182
    for pos in range(0, len(bit_string), 512):
183
        block = bit_string[pos : pos + 512]
184
        block_words = []
185
        for i in range(0, 512, 32):
186
            block_words.append(int(to_little_endian(block[i : i + 32]), 2))
187
        yield block_words
188

189

190
def not_32(i: int) -> int:
191
    """
192
    Perform bitwise NOT on given int.
193

194
    Arguments:
195
        i {[int]} -- [given int]
196

197
    Raises:
198
        ValueError -- [input is negative]
199

200
    Returns:
201
        Result of bitwise NOT on i
202

203
    >>> not_32(34)
204
    4294967261
205
    >>> not_32(1234)
206
    4294966061
207
    >>> not_32(4294966061)
208
    1234
209
    >>> not_32(0)
210
    4294967295
211
    >>> not_32(1)
212
    4294967294
213
    >>> not_32(-1)
214
    Traceback (most recent call last):
215
    ...
216
    ValueError: Input must be non-negative
217
    """
218
    if i < 0:
219
        raise ValueError("Input must be non-negative")
220

221
    i_str = format(i, "032b")
222
    new_str = ""
223
    for c in i_str:
224
        new_str += "1" if c == "0" else "0"
225
    return int(new_str, 2)
226

227

228
def sum_32(a: int, b: int) -> int:
229
    """
230
    Add two numbers as 32-bit ints.
231

232
    Arguments:
233
        a {[int]} -- [first given int]
234
        b {[int]} -- [second given int]
235

236
    Returns:
237
        (a + b) as an unsigned 32-bit int
238

239
    >>> sum_32(1, 1)
240
    2
241
    >>> sum_32(2, 3)
242
    5
243
    >>> sum_32(0, 0)
244
    0
245
    >>> sum_32(-1, -1)
246
    4294967294
247
    >>> sum_32(4294967295, 1)
248
    0
249
    """
250
    return (a + b) % 2**32
251

252

253
def left_rotate_32(i: int, shift: int) -> int:
254
    """
255
    Rotate the bits of a given int left by a given amount.
256

257
    Arguments:
258
        i {[int]} -- [given int]
259
        shift {[int]} -- [shift amount]
260

261
    Raises:
262
        ValueError -- [either given int or shift is negative]
263

264
    Returns:
265
        `i` rotated to the left by `shift` bits
266

267
    >>> left_rotate_32(1234, 1)
268
    2468
269
    >>> left_rotate_32(1111, 4)
270
    17776
271
    >>> left_rotate_32(2147483648, 1)
272
    1
273
    >>> left_rotate_32(2147483648, 3)
274
    4
275
    >>> left_rotate_32(4294967295, 4)
276
    4294967295
277
    >>> left_rotate_32(1234, 0)
278
    1234
279
    >>> left_rotate_32(0, 0)
280
    0
281
    >>> left_rotate_32(-1, 0)
282
    Traceback (most recent call last):
283
    ...
284
    ValueError: Input must be non-negative
285
    >>> left_rotate_32(0, -1)
286
    Traceback (most recent call last):
287
    ...
288
    ValueError: Shift must be non-negative
289
    """
290
    if i < 0:
291
        raise ValueError("Input must be non-negative")
292
    if shift < 0:
293
        raise ValueError("Shift must be non-negative")
294
    return ((i << shift) ^ (i >> (32 - shift))) % 2**32
295

296

297
def md5_me(message: bytes) -> bytes:
298
    """
299
    Returns the 32-char MD5 hash of a given message.
300

301
    Reference: https://en.wikipedia.org/wiki/MD5#Algorithm
302

303
    Arguments:
304
        message {[string]} -- [message]
305

306
    Returns:
307
        32-char MD5 hash string
308

309
    >>> md5_me(b"")
310
    b'd41d8cd98f00b204e9800998ecf8427e'
311
    >>> md5_me(b"The quick brown fox jumps over the lazy dog")
312
    b'9e107d9d372bb6826bd81d3542a419d6'
313
    >>> md5_me(b"The quick brown fox jumps over the lazy dog.")
314
    b'e4d909c290d0fb1ca068ffaddf22cbd0'
315

316
    >>> import hashlib
317
    >>> from string import ascii_letters
318
    >>> msgs = [b"", ascii_letters.encode("utf-8"), "Üñîçø∂é".encode("utf-8"),
319
    ...         b"The quick brown fox jumps over the lazy dog."]
320
    >>> all(md5_me(msg) == hashlib.md5(msg).hexdigest().encode("utf-8") for msg in msgs)
321
    True
322
    """
323

324
    # Convert to bit string, add padding and append message length
325
    bit_string = preprocess(message)
326

327
    added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)]
328

329
    # Starting states
330
    a0 = 0x67452301
331
    b0 = 0xEFCDAB89
332
    c0 = 0x98BADCFE
333
    d0 = 0x10325476
334

335
    shift_amounts = [
336
        7,
337
        12,
338
        17,
339
        22,
340
        7,
341
        12,
342
        17,
343
        22,
344
        7,
345
        12,
346
        17,
347
        22,
348
        7,
349
        12,
350
        17,
351
        22,
352
        5,
353
        9,
354
        14,
355
        20,
356
        5,
357
        9,
358
        14,
359
        20,
360
        5,
361
        9,
362
        14,
363
        20,
364
        5,
365
        9,
366
        14,
367
        20,
368
        4,
369
        11,
370
        16,
371
        23,
372
        4,
373
        11,
374
        16,
375
        23,
376
        4,
377
        11,
378
        16,
379
        23,
380
        4,
381
        11,
382
        16,
383
        23,
384
        6,
385
        10,
386
        15,
387
        21,
388
        6,
389
        10,
390
        15,
391
        21,
392
        6,
393
        10,
394
        15,
395
        21,
396
        6,
397
        10,
398
        15,
399
        21,
400
    ]
401

402
    # Process bit string in chunks, each with 16 32-char words
403
    for block_words in get_block_words(bit_string):
404
        a = a0
405
        b = b0
406
        c = c0
407
        d = d0
408

409
        # Hash current chunk
410
        for i in range(64):
411
            if i <= 15:
412
                # f = (b & c) | (not_32(b) & d)     # Alternate definition for f
413
                f = d ^ (b & (c ^ d))
414
                g = i
415
            elif i <= 31:
416
                # f = (d & b) | (not_32(d) & c)     # Alternate definition for f
417
                f = c ^ (d & (b ^ c))
418
                g = (5 * i + 1) % 16
419
            elif i <= 47:
420
                f = b ^ c ^ d
421
                g = (3 * i + 5) % 16
422
            else:
423
                f = c ^ (b | not_32(d))
424
                g = (7 * i) % 16
425
            f = (f + a + added_consts[i] + block_words[g]) % 2**32
426
            a = d
427
            d = c
428
            c = b
429
            b = sum_32(b, left_rotate_32(f, shift_amounts[i]))
430

431
        # Add hashed chunk to running total
432
        a0 = sum_32(a0, a)
433
        b0 = sum_32(b0, b)
434
        c0 = sum_32(c0, c)
435
        d0 = sum_32(d0, d)
436

437
    digest = reformat_hex(a0) + reformat_hex(b0) + reformat_hex(c0) + reformat_hex(d0)
438
    return digest
439

440

441
if __name__ == "__main__":
442
    import doctest
443

444
    doctest.testmod()
445

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.