TheAlgorithms-Python

Форк
0
/
damerau_levenshtein_distance.py 
71 строка · 2.2 Кб
1
"""
2
This script is a implementation of the Damerau-Levenshtein distance algorithm.
3

4
It's an algorithm that measures the edit distance between two string sequences
5

6
More information about this algorithm can be found in this wikipedia article:
7
https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
8
"""
9

10

11
def damerau_levenshtein_distance(first_string: str, second_string: str) -> int:
12
    """
13
    Implements the Damerau-Levenshtein distance algorithm that measures
14
    the edit distance between two strings.
15

16
    Parameters:
17
        first_string: The first string to compare
18
        second_string: The second string to compare
19

20
    Returns:
21
        distance: The edit distance between the first and second strings
22

23
    >>> damerau_levenshtein_distance("cat", "cut")
24
    1
25
    >>> damerau_levenshtein_distance("kitten", "sitting")
26
    3
27
    >>> damerau_levenshtein_distance("hello", "world")
28
    4
29
    >>> damerau_levenshtein_distance("book", "back")
30
    2
31
    >>> damerau_levenshtein_distance("container", "containment")
32
    3
33
    >>> damerau_levenshtein_distance("container", "containment")
34
    3
35
    """
36
    # Create a dynamic programming matrix to store the distances
37
    dp_matrix = [[0] * (len(second_string) + 1) for _ in range(len(first_string) + 1)]
38

39
    # Initialize the matrix
40
    for i in range(len(first_string) + 1):
41
        dp_matrix[i][0] = i
42
    for j in range(len(second_string) + 1):
43
        dp_matrix[0][j] = j
44

45
    # Fill the matrix
46
    for i, first_char in enumerate(first_string, start=1):
47
        for j, second_char in enumerate(second_string, start=1):
48
            cost = int(first_char != second_char)
49

50
            dp_matrix[i][j] = min(
51
                dp_matrix[i - 1][j] + 1,  # Deletion
52
                dp_matrix[i][j - 1] + 1,  # Insertion
53
                dp_matrix[i - 1][j - 1] + cost,  # Substitution
54
            )
55

56
            if (
57
                i > 1
58
                and j > 1
59
                and first_string[i - 1] == second_string[j - 2]
60
                and first_string[i - 2] == second_string[j - 1]
61
            ):
62
                # Transposition
63
                dp_matrix[i][j] = min(dp_matrix[i][j], dp_matrix[i - 2][j - 2] + cost)
64

65
    return dp_matrix[-1][-1]
66

67

68
if __name__ == "__main__":
69
    import doctest
70

71
    doctest.testmod()
72

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.