TheAlgorithms-Python
79 строк · 2.3 Кб
1"""
2Bitap exact string matching
3https://en.wikipedia.org/wiki/Bitap_algorithm
4
5Searches for a pattern inside text, and returns the index of the first occurrence
6of the pattern. Both text and pattern consist of lowercase alphabetical characters only.
7
8Complexity: O(m*n)
9n = length of text
10m = length of pattern
11
12Python doctests can be run using this command:
13python3 -m doctest -v bitap_string_match.py
14"""
15
16
17def bitap_string_match(text: str, pattern: str) -> int:
18"""
19Retrieves the index of the first occurrence of pattern in text.
20
21Args:
22text: A string consisting only of lowercase alphabetical characters.
23pattern: A string consisting only of lowercase alphabetical characters.
24
25Returns:
26int: The index where pattern first occurs. Return -1 if not found.
27
28>>> bitap_string_match('abdabababc', 'ababc')
295
30>>> bitap_string_match('aaaaaaaaaaaaaaaaaa', 'a')
310
32>>> bitap_string_match('zxywsijdfosdfnso', 'zxywsijdfosdfnso')
330
34>>> bitap_string_match('abdabababc', '')
350
36>>> bitap_string_match('abdabababc', 'c')
379
38>>> bitap_string_match('abdabababc', 'fofosdfo')
39-1
40>>> bitap_string_match('abdab', 'fofosdfo')
41-1
42"""
43if not pattern:
44return 0
45m = len(pattern)
46if m > len(text):
47return -1
48
49# Initial state of bit string 1110
50state = ~1
51# Bit = 0 if character appears at index, and 1 otherwise
52pattern_mask: list[int] = [~0] * 27 # 1111
53
54for i, char in enumerate(pattern):
55# For the pattern mask for this character, set the bit to 0 for each i
56# the character appears.
57pattern_index: int = ord(char) - ord("a")
58pattern_mask[pattern_index] &= ~(1 << i)
59
60for i, char in enumerate(text):
61text_index = ord(char) - ord("a")
62# If this character does not appear in pattern, it's pattern mask is 1111.
63# Performing a bitwise OR between state and 1111 will reset the state to 1111
64# and start searching the start of pattern again.
65state |= pattern_mask[text_index]
66state <<= 1
67
68# If the mth bit (counting right to left) of the state is 0, then we have
69# found pattern in text
70if (state & (1 << m)) == 0:
71return i - m + 1
72
73return -1
74
75
76if __name__ == "__main__":
77import doctest
78
79doctest.testmod()
80