Langchain-Chatchat

Форк
0
/
knowledge_file_repository.py 
198 строк · 7.5 Кб
1
from server.db.models.knowledge_base_model import KnowledgeBaseModel
2
from server.db.models.knowledge_file_model import KnowledgeFileModel, FileDocModel
3
from server.db.session import with_session
4
from server.knowledge_base.utils import KnowledgeFile
5
from typing import List, Dict
6

7

8
@with_session
9
def list_file_num_docs_id_by_kb_name_and_file_name(session,
10
                                                   kb_name: str,
11
                                                   file_name: str,
12
                                                   ) -> List[int]:
13
    '''
14
    列出某知识库某文件对应的所有Document的id。
15
    返回形式:[str, ...]
16
    '''
17
    doc_ids = session.query(FileDocModel.doc_id).filter_by(kb_name=kb_name, file_name=file_name).all()
18
    return [int(_id[0]) for _id in doc_ids]
19

20

21
@with_session
22
def list_docs_from_db(session,
23
                      kb_name: str,
24
                      file_name: str = None,
25
                      metadata: Dict = {},
26
                      ) -> List[Dict]:
27
    '''
28
    列出某知识库某文件对应的所有Document。
29
    返回形式:[{"id": str, "metadata": dict}, ...]
30
    '''
31
    docs = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))
32
    if file_name:
33
        docs = docs.filter(FileDocModel.file_name.ilike(file_name))
34
    for k, v in metadata.items():
35
        docs = docs.filter(FileDocModel.meta_data[k].as_string() == str(v))
36

37
    return [{"id": x.doc_id, "metadata": x.metadata} for x in docs.all()]
38

39

40
@with_session
41
def delete_docs_from_db(session,
42
                        kb_name: str,
43
                        file_name: str = None,
44
                        ) -> List[Dict]:
45
    '''
46
    删除某知识库某文件对应的所有Document,并返回被删除的Document。
47
    返回形式:[{"id": str, "metadata": dict}, ...]
48
    '''
49
    docs = list_docs_from_db(kb_name=kb_name, file_name=file_name)
50
    query = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))
51
    if file_name:
52
        query = query.filter(FileDocModel.file_name.ilike(file_name))
53
    query.delete(synchronize_session=False)
54
    session.commit()
55
    return docs
56

57

58
@with_session
59
def add_docs_to_db(session,
60
                   kb_name: str,
61
                   file_name: str,
62
                   doc_infos: List[Dict]):
63
    '''
64
    将某知识库某文件对应的所有Document信息添加到数据库。
65
    doc_infos形式:[{"id": str, "metadata": dict}, ...]
66
    '''
67
    # ! 这里会出现doc_infos为None的情况,需要进一步排查
68
    if doc_infos is None:
69
        print("输入的server.db.repository.knowledge_file_repository.add_docs_to_db的doc_infos参数为None")
70
        return False
71
    for d in doc_infos:
72
        obj = FileDocModel(
73
            kb_name=kb_name,
74
            file_name=file_name,
75
            doc_id=d["id"],
76
            meta_data=d["metadata"],
77
        )
78
        session.add(obj)
79
    return True
80

81

82
@with_session
83
def count_files_from_db(session, kb_name: str) -> int:
84
    return session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).count()
85

86

87
@with_session
88
def list_files_from_db(session, kb_name):
89
    files = session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).all()
90
    docs = [f.file_name for f in files]
91
    return docs
92

93

94
@with_session
95
def add_file_to_db(session,
96
                   kb_file: KnowledgeFile,
97
                   docs_count: int = 0,
98
                   custom_docs: bool = False,
99
                   doc_infos: List[Dict] = [],  # 形式:[{"id": str, "metadata": dict}, ...]
100
                   ):
101
    kb = session.query(KnowledgeBaseModel).filter_by(kb_name=kb_file.kb_name).first()
102
    if kb:
103
        # 如果已经存在该文件,则更新文件信息与版本号
104
        existing_file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
105
                                             .filter(KnowledgeFileModel.kb_name.ilike(kb_file.kb_name),
106
                                                     KnowledgeFileModel.file_name.ilike(kb_file.filename))
107
                                             .first())
108
        mtime = kb_file.get_mtime()
109
        size = kb_file.get_size()
110

111
        if existing_file:
112
            existing_file.file_mtime = mtime
113
            existing_file.file_size = size
114
            existing_file.docs_count = docs_count
115
            existing_file.custom_docs = custom_docs
116
            existing_file.file_version += 1
117
        # 否则,添加新文件
118
        else:
119
            new_file = KnowledgeFileModel(
120
                file_name=kb_file.filename,
121
                file_ext=kb_file.ext,
122
                kb_name=kb_file.kb_name,
123
                document_loader_name=kb_file.document_loader_name,
124
                text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter",
125
                file_mtime=mtime,
126
                file_size=size,
127
                docs_count=docs_count,
128
                custom_docs=custom_docs,
129
            )
130
            kb.file_count += 1
131
            session.add(new_file)
132
        add_docs_to_db(kb_name=kb_file.kb_name, file_name=kb_file.filename, doc_infos=doc_infos)
133
    return True
134

135

136
@with_session
137
def delete_file_from_db(session, kb_file: KnowledgeFile):
138
    existing_file = (session.query(KnowledgeFileModel)
139
                     .filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
140
                             KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
141
                     .first())
142
    if existing_file:
143
        session.delete(existing_file)
144
        delete_docs_from_db(kb_name=kb_file.kb_name, file_name=kb_file.filename)
145
        session.commit()
146

147
        kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_file.kb_name)).first()
148
        if kb:
149
            kb.file_count -= 1
150
            session.commit()
151
    return True
152

153

154
@with_session
155
def delete_files_from_db(session, knowledge_base_name: str):
156
    session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(
157
        synchronize_session=False)
158
    session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(
159
        synchronize_session=False)
160
    kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()
161
    if kb:
162
        kb.file_count = 0
163

164
    session.commit()
165
    return True
166

167

168
@with_session
169
def file_exists_in_db(session, kb_file: KnowledgeFile):
170
    existing_file = (session.query(KnowledgeFileModel)
171
                     .filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
172
                             KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
173
                     .first())
174
    return True if existing_file else False
175

176

177
@with_session
178
def get_file_detail(session, kb_name: str, filename: str) -> dict:
179
    file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
180
                                .filter(KnowledgeFileModel.file_name.ilike(filename),
181
                                        KnowledgeFileModel.kb_name.ilike(kb_name))
182
                                .first())
183
    if file:
184
        return {
185
            "kb_name": file.kb_name,
186
            "file_name": file.file_name,
187
            "file_ext": file.file_ext,
188
            "file_version": file.file_version,
189
            "document_loader": file.document_loader_name,
190
            "text_splitter": file.text_splitter_name,
191
            "create_time": file.create_time,
192
            "file_mtime": file.file_mtime,
193
            "file_size": file.file_size,
194
            "custom_docs": file.custom_docs,
195
            "docs_count": file.docs_count,
196
        }
197
    else:
198
        return {}
199

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.