google-research
96 строк · 3.1 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Computes the Longest Common Subsequence (LCS).
17
18Description of the dynamic programming algorithm:
19https://www.algorithmist.com/index.php/Longest_Common_Subsequence
20"""
21
22import contextlib
23import sys
24
25
26@contextlib.contextmanager
27def _recursion_limit(new_limit):
28original_limit = sys.getrecursionlimit()
29sys.setrecursionlimit(new_limit)
30try:
31yield
32finally:
33sys.setrecursionlimit(original_limit)
34
35
36def compute_lcs(sequence_1, sequence_2, max_recursion_depth=10000):
37"""Computes the Longest Common Subsequence (LCS).
38
39Description of the dynamic programming algorithm:
40https://www.algorithmist.com/index.php/Longest_Common_Subsequence
41
42Args:
43sequence_1: First of the two sequences to be aligned.
44sequence_2: Second of the two sequences to be aligned.
45max_recursion_depth: Maximum recursion depth for the LCS backtracking.
46
47Returns:
48Sequence of items in the LCS.
49
50Raises:
51RecursionError: If computing LCS requires too many recursive calls.
52This can be avoided by setting a higher max_recursion_depth.
53"""
54table = _lcs_table(sequence_1, sequence_2)
55with _recursion_limit(max_recursion_depth):
56return _backtrack(table, sequence_1, sequence_2, len(sequence_1),
57len(sequence_2))
58
59
60def _lcs_table(sequence_1, sequence_2):
61"""Returns the Longest Common Subsequence dynamic programming table."""
62rows = len(sequence_1)
63cols = len(sequence_2)
64lcs_table = [[0] * (cols + 1) for _ in range(rows + 1)]
65for i in range(1, rows + 1):
66for j in range(1, cols + 1):
67if sequence_1[i - 1] == sequence_2[j - 1]:
68lcs_table[i][j] = lcs_table[i - 1][j - 1] + 1
69else:
70lcs_table[i][j] = max(lcs_table[i - 1][j], lcs_table[i][j - 1])
71return lcs_table
72
73
74def _backtrack(table, sequence_1, sequence_2, i, j):
75"""Backtracks the Longest Common Subsequence table to reconstruct the LCS.
76
77Args:
78table: Precomputed LCS table.
79sequence_1: First of the two sequences to be aligned.
80sequence_2: Second of the two sequences to be aligned.
81i: Current row index.
82j: Current column index.
83
84Returns:
85List of tokens corresponding to LCS.
86"""
87if i == 0 or j == 0:
88return []
89if sequence_1[i - 1] == sequence_2[j - 1]:
90# Append the aligned token to output.
91return _backtrack(table, sequence_1, sequence_2, i - 1,
92j - 1) + [sequence_2[j - 1]]
93if table[i][j - 1] > table[i - 1][j]:
94return _backtrack(table, sequence_1, sequence_2, i, j - 1)
95else:
96return _backtrack(table, sequence_1, sequence_2, i - 1, j)
97