TheAlgorithms-Python

Форк
0
/
bitap_string_match.py 
79 строк · 2.3 Кб
1
"""
2
Bitap exact string matching
3
https://en.wikipedia.org/wiki/Bitap_algorithm
4

5
Searches for a pattern inside text, and returns the index of the first occurrence
6
of the pattern. Both text and pattern consist of lowercase alphabetical characters only.
7

8
Complexity: O(m*n)
9
    n = length of text
10
    m = length of pattern
11

12
Python doctests can be run using this command:
13
python3 -m doctest -v bitap_string_match.py
14
"""
15

16

17
def bitap_string_match(text: str, pattern: str) -> int:
18
    """
19
    Retrieves the index of the first occurrence of pattern in text.
20

21
    Args:
22
        text: A string consisting only of lowercase alphabetical characters.
23
        pattern: A string consisting only of lowercase alphabetical characters.
24

25
    Returns:
26
        int: The index where pattern first occurs. Return -1  if not found.
27

28
    >>> bitap_string_match('abdabababc', 'ababc')
29
    5
30
    >>> bitap_string_match('aaaaaaaaaaaaaaaaaa', 'a')
31
    0
32
    >>> bitap_string_match('zxywsijdfosdfnso', 'zxywsijdfosdfnso')
33
    0
34
    >>> bitap_string_match('abdabababc', '')
35
    0
36
    >>> bitap_string_match('abdabababc', 'c')
37
    9
38
    >>> bitap_string_match('abdabababc', 'fofosdfo')
39
    -1
40
    >>> bitap_string_match('abdab', 'fofosdfo')
41
    -1
42
    """
43
    if not pattern:
44
        return 0
45
    m = len(pattern)
46
    if m > len(text):
47
        return -1
48

49
    # Initial state of bit string 1110
50
    state = ~1
51
    # Bit = 0 if character appears at index, and 1 otherwise
52
    pattern_mask: list[int] = [~0] * 27  # 1111
53

54
    for i, char in enumerate(pattern):
55
        # For the pattern mask for this character, set the bit to 0 for each i
56
        # the character appears.
57
        pattern_index: int = ord(char) - ord("a")
58
        pattern_mask[pattern_index] &= ~(1 << i)
59

60
    for i, char in enumerate(text):
61
        text_index = ord(char) - ord("a")
62
        # If this character does not appear in pattern, it's pattern mask is 1111.
63
        # Performing a bitwise OR between state and 1111 will reset the state to 1111
64
        # and start searching the start of pattern again.
65
        state |= pattern_mask[text_index]
66
        state <<= 1
67

68
        # If the mth bit (counting right to left) of the state is 0, then we have
69
        # found pattern in text
70
        if (state & (1 << m)) == 0:
71
            return i - m + 1
72

73
    return -1
74

75

76
if __name__ == "__main__":
77
    import doctest
78

79
    doctest.testmod()
80

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.