google-research

Форк
0
96 строк · 3.1 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Computes the Longest Common Subsequence (LCS).
17

18
  Description of the dynamic programming algorithm:
19
  https://www.algorithmist.com/index.php/Longest_Common_Subsequence
20
"""
21

22
import contextlib
23
import sys
24

25

26
@contextlib.contextmanager
27
def _recursion_limit(new_limit):
28
  original_limit = sys.getrecursionlimit()
29
  sys.setrecursionlimit(new_limit)
30
  try:
31
    yield
32
  finally:
33
    sys.setrecursionlimit(original_limit)
34

35

36
def compute_lcs(sequence_1, sequence_2, max_recursion_depth=10000):
37
  """Computes the Longest Common Subsequence (LCS).
38

39
  Description of the dynamic programming algorithm:
40
  https://www.algorithmist.com/index.php/Longest_Common_Subsequence
41

42
  Args:
43
    sequence_1: First of the two sequences to be aligned.
44
    sequence_2: Second of the two sequences to be aligned.
45
    max_recursion_depth: Maximum recursion depth for the LCS backtracking.
46

47
  Returns:
48
    Sequence of items in the LCS.
49

50
  Raises:
51
    RecursionError: If computing LCS requires too many recursive calls.
52
      This can be avoided by setting a higher max_recursion_depth.
53
  """
54
  table = _lcs_table(sequence_1, sequence_2)
55
  with _recursion_limit(max_recursion_depth):
56
    return _backtrack(table, sequence_1, sequence_2, len(sequence_1),
57
                      len(sequence_2))
58

59

60
def _lcs_table(sequence_1, sequence_2):
61
  """Returns the Longest Common Subsequence dynamic programming table."""
62
  rows = len(sequence_1)
63
  cols = len(sequence_2)
64
  lcs_table = [[0] * (cols + 1) for _ in range(rows + 1)]
65
  for i in range(1, rows + 1):
66
    for j in range(1, cols + 1):
67
      if sequence_1[i - 1] == sequence_2[j - 1]:
68
        lcs_table[i][j] = lcs_table[i - 1][j - 1] + 1
69
      else:
70
        lcs_table[i][j] = max(lcs_table[i - 1][j], lcs_table[i][j - 1])
71
  return lcs_table
72

73

74
def _backtrack(table, sequence_1, sequence_2, i, j):
75
  """Backtracks the Longest Common Subsequence table to reconstruct the LCS.
76

77
  Args:
78
    table: Precomputed LCS table.
79
    sequence_1: First of the two sequences to be aligned.
80
    sequence_2: Second of the two sequences to be aligned.
81
    i: Current row index.
82
    j: Current column index.
83

84
  Returns:
85
    List of tokens corresponding to LCS.
86
  """
87
  if i == 0 or j == 0:
88
    return []
89
  if sequence_1[i - 1] == sequence_2[j - 1]:
90
    # Append the aligned token to output.
91
    return _backtrack(table, sequence_1, sequence_2, i - 1,
92
                      j - 1) + [sequence_2[j - 1]]
93
  if table[i][j - 1] > table[i - 1][j]:
94
    return _backtrack(table, sequence_1, sequence_2, i, j - 1)
95
  else:
96
    return _backtrack(table, sequence_1, sequence_2, i - 1, j)
97

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.