llama-index

reader.py
116 строк · 3.9 Кб
Перенос по словам
1
"""Weaviate reader."""
2

3
from typing import Any, List, Optional
4

5
from llama_index.legacy.readers.base import BaseReader
6
from llama_index.legacy.schema import Document
7

8

9
class WeaviateReader(BaseReader):
10
    """Weaviate reader.
11

12
    Retrieves documents from Weaviate through vector lookup. Allows option
13
    to concatenate retrieved documents into one Document, or to return
14
    separate Document objects per document.
15

16
    Args:
17
        host (str): host.
18
        auth_client_secret (Optional[weaviate.auth.AuthCredentials]):
19
            auth_client_secret.
20
    """
21

22
    def __init__(
23
        self,
24
        host: str,
25
        auth_client_secret: Optional[Any] = None,
26
    ) -> None:
27
        """Initialize with parameters."""
28
        try:
29
            import weaviate  # noqa
30
            from weaviate import Client
31
            from weaviate.auth import AuthCredentials  # noqa
32
        except ImportError:
33
            raise ImportError(
34
                "`weaviate` package not found, please run `pip install weaviate-client`"
35
            )
36

37
        self.client: Client = Client(host, auth_client_secret=auth_client_secret)
38

39
    def load_data(
40
        self,
41
        class_name: Optional[str] = None,
42
        properties: Optional[List[str]] = None,
43
        graphql_query: Optional[str] = None,
44
        separate_documents: Optional[bool] = True,
45
    ) -> List[Document]:
46
        """Load data from Weaviate.
47

48
        If `graphql_query` is not found in load_kwargs, we assume that
49
        `class_name` and `properties` are provided.
50

51
        Args:
52
            class_name (Optional[str]): class_name to retrieve documents from.
53
            properties (Optional[List[str]]): properties to retrieve from documents.
54
            graphql_query (Optional[str]): Raw GraphQL Query.
55
                We assume that the query is a Get query.
56
            separate_documents (Optional[bool]): Whether to return separate
57
                documents. Defaults to True.
58

59
        Returns:
60
            List[Document]: A list of documents.
61

62
        """
63
        if class_name is not None and properties is not None:
64
            props_txt = "\n".join(properties)
65
            graphql_query = f"""
66
            {{
67
                Get {{
68
                    {class_name} {{
69
                        {props_txt}
70
                    }}
71
                }}
72
            }}
73
            """
74
        elif graphql_query is not None:
75
            pass
76
        else:
77
            raise ValueError(
78
                "Either `class_name` and `properties` must be specified, "
79
                "or `graphql_query` must be specified."
80
            )
81

82
        response = self.client.query.raw(graphql_query)
83
        if "errors" in response:
84
            raise ValueError("Invalid query, got errors: {}".format(response["errors"]))
85

86
        data_response = response["data"]
87
        if "Get" not in data_response:
88
            raise ValueError("Invalid query response, must be a Get query.")
89

90
        if class_name is None:
91
            # infer class_name if only graphql_query was provided
92
            class_name = next(iter(data_response["Get"].keys()))
93
        entries = data_response["Get"][class_name]
94
        documents = []
95
        for entry in entries:
96
            embedding: Optional[List[float]] = None
97
            # for each entry, join properties into <property>:<value>
98
            # separated by newlines
99
            text_list = []
100
            for k, v in entry.items():
101
                if k == "_additional":
102
                    if "vector" in v:
103
                        embedding = v["vector"]
104
                    continue
105
                text_list.append(f"{k}: {v}")
106

107
            text = "\n".join(text_list)
108
            documents.append(Document(text=text, embedding=embedding))
109

110
        if not separate_documents:
111
            # join all documents into one
112
            text_list = [doc.get_content() for doc in documents]
113
            text = "\n\n".join(text_list)
114
            documents = [Document(text=text)]
115

116
        return documents
117
llama-index

Использование cookies