llama-index
90 строк · 2.5 Кб
1from llama_index.legacy.constants import DATA_KEY, TYPE_KEY
2from llama_index.legacy.schema import (
3BaseNode,
4Document,
5ImageDocument,
6ImageNode,
7IndexNode,
8NodeRelationship,
9RelatedNodeInfo,
10TextNode,
11)
12
13
14def doc_to_json(doc: BaseNode) -> dict:
15return {
16DATA_KEY: doc.dict(),
17TYPE_KEY: doc.get_type(),
18}
19
20
21def json_to_doc(doc_dict: dict) -> BaseNode:
22doc_type = doc_dict[TYPE_KEY]
23data_dict = doc_dict[DATA_KEY]
24doc: BaseNode
25
26if "extra_info" in data_dict:
27return legacy_json_to_doc(doc_dict)
28else:
29if doc_type == Document.get_type():
30doc = Document.parse_obj(data_dict)
31elif doc_type == ImageDocument.get_type():
32doc = ImageDocument.parse_obj(data_dict)
33elif doc_type == TextNode.get_type():
34doc = TextNode.parse_obj(data_dict)
35elif doc_type == ImageNode.get_type():
36doc = ImageNode.parse_obj(data_dict)
37elif doc_type == IndexNode.get_type():
38doc = IndexNode.parse_obj(data_dict)
39else:
40raise ValueError(f"Unknown doc type: {doc_type}")
41
42return doc
43
44
45def legacy_json_to_doc(doc_dict: dict) -> BaseNode:
46"""Todo: Deprecated legacy support for old node versions."""
47doc_type = doc_dict[TYPE_KEY]
48data_dict = doc_dict[DATA_KEY]
49doc: BaseNode
50
51text = data_dict.get("text", "")
52metadata = data_dict.get("extra_info", {}) or {}
53id_ = data_dict.get("doc_id", None)
54
55relationships = data_dict.get("relationships", {})
56relationships = {
57NodeRelationship(k): RelatedNodeInfo(node_id=v)
58for k, v in relationships.items()
59}
60
61if doc_type == Document.get_type():
62doc = Document(
63text=text, metadata=metadata, id=id_, relationships=relationships
64)
65elif doc_type == TextNode.get_type():
66doc = TextNode(
67text=text, metadata=metadata, id=id_, relationships=relationships
68)
69elif doc_type == ImageNode.get_type():
70image = data_dict.get("image", None)
71doc = ImageNode(
72text=text,
73metadata=metadata,
74id=id_,
75relationships=relationships,
76image=image,
77)
78elif doc_type == IndexNode.get_type():
79index_id = data_dict.get("index_id", None)
80doc = IndexNode(
81text=text,
82metadata=metadata,
83id=id_,
84relationships=relationships,
85index_id=index_id,
86)
87else:
88raise ValueError(f"Unknown doc type: {doc_type}")
89
90return doc
91