llama-index

Форк
0
108 строк · 3.4 Кб
1
import logging
2
import re
3
from typing import TYPE_CHECKING, Any, List, Optional, Pattern
4

5
import numpy as np
6

7
_logger = logging.getLogger(__name__)
8

9
if TYPE_CHECKING:
10
    from redis.client import Redis as RedisType
11
    from redis.commands.search.query import Query
12

13

14
class TokenEscaper:
15
    """
16
    Escape punctuation within an input string. Taken from RedisOM Python.
17
    """
18

19
    # Characters that RediSearch requires us to escape during queries.
20
    # Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization
21
    DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"
22

23
    def __init__(self, escape_chars_re: Optional[Pattern] = None):
24
        if escape_chars_re:
25
            self.escaped_chars_re = escape_chars_re
26
        else:
27
            self.escaped_chars_re = re.compile(self.DEFAULT_ESCAPED_CHARS)
28

29
    def escape(self, value: str) -> str:
30
        def escape_symbol(match: re.Match) -> str:
31
            value = match.group(0)
32
            return f"\\{value}"
33

34
        return self.escaped_chars_re.sub(escape_symbol, value)
35

36

37
# required modules
38
REDIS_REQUIRED_MODULES = [
39
    {"name": "search", "ver": 20400},
40
    {"name": "searchlight", "ver": 20400},
41
]
42

43

44
def check_redis_modules_exist(client: "RedisType") -> None:
45
    """Check if the correct Redis modules are installed."""
46
    installed_modules = client.module_list()
47
    installed_modules = {
48
        module[b"name"].decode("utf-8"): module for module in installed_modules
49
    }
50
    for module in REDIS_REQUIRED_MODULES:
51
        if module["name"] in installed_modules and int(
52
            installed_modules[module["name"]][b"ver"]
53
        ) >= int(
54
            module["ver"]
55
        ):  # type: ignore[call-overload]
56
            return
57
    # otherwise raise error
58
    error_message = (
59
        "You must add the RediSearch (>= 2.4) module from Redis Stack. "
60
        "Please refer to Redis Stack docs: https://redis.io/docs/stack/"
61
    )
62
    _logger.error(error_message)
63
    raise ValueError(error_message)
64

65

66
def get_redis_query(
67
    return_fields: List[str],
68
    top_k: int = 20,
69
    vector_field: str = "vector",
70
    sort: bool = True,
71
    filters: str = "*",
72
) -> "Query":
73
    """Create a vector query for use with a SearchIndex.
74

75
    Args:
76
        return_fields (t.List[str]): A list of fields to return in the query results
77
        top_k (int, optional): The number of results to return. Defaults to 20.
78
        vector_field (str, optional): The name of the vector field in the index.
79
            Defaults to "vector".
80
        sort (bool, optional): Whether to sort the results by score. Defaults to True.
81
        filters (str, optional): string to filter the results by. Defaults to "*".
82

83
    """
84
    from redis.commands.search.query import Query
85

86
    base_query = f"{filters}=>[KNN {top_k} @{vector_field} $vector AS vector_score]"
87

88
    query = Query(base_query).return_fields(*return_fields).dialect(2).paging(0, top_k)
89

90
    if sort:
91
        query.sort_by("vector_score")
92
    return query
93

94

95
def convert_bytes(data: Any) -> Any:
96
    if isinstance(data, bytes):
97
        return data.decode("ascii")
98
    if isinstance(data, dict):
99
        return dict(map(convert_bytes, data.items()))
100
    if isinstance(data, list):
101
        return list(map(convert_bytes, data))
102
    if isinstance(data, tuple):
103
        return map(convert_bytes, data)
104
    return data
105

106

107
def array_to_buffer(array: List[float], dtype: Any = np.float32) -> bytes:
108
    return np.array(array).astype(dtype).tobytes()
109

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.