Langchain-Chatchat
198 строк · 7.5 Кб
1from server.db.models.knowledge_base_model import KnowledgeBaseModel2from server.db.models.knowledge_file_model import KnowledgeFileModel, FileDocModel3from server.db.session import with_session4from server.knowledge_base.utils import KnowledgeFile5from typing import List, Dict6
7
8@with_session
9def list_file_num_docs_id_by_kb_name_and_file_name(session,10kb_name: str,11file_name: str,12) -> List[int]:13'''14列出某知识库某文件对应的所有Document的id。
15返回形式:[str, ...]
16'''
17doc_ids = session.query(FileDocModel.doc_id).filter_by(kb_name=kb_name, file_name=file_name).all()18return [int(_id[0]) for _id in doc_ids]19
20
21@with_session
22def list_docs_from_db(session,23kb_name: str,24file_name: str = None,25metadata: Dict = {},26) -> List[Dict]:27'''28列出某知识库某文件对应的所有Document。
29返回形式:[{"id": str, "metadata": dict}, ...]
30'''
31docs = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))32if file_name:33docs = docs.filter(FileDocModel.file_name.ilike(file_name))34for k, v in metadata.items():35docs = docs.filter(FileDocModel.meta_data[k].as_string() == str(v))36
37return [{"id": x.doc_id, "metadata": x.metadata} for x in docs.all()]38
39
40@with_session
41def delete_docs_from_db(session,42kb_name: str,43file_name: str = None,44) -> List[Dict]:45'''46删除某知识库某文件对应的所有Document,并返回被删除的Document。
47返回形式:[{"id": str, "metadata": dict}, ...]
48'''
49docs = list_docs_from_db(kb_name=kb_name, file_name=file_name)50query = session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(kb_name))51if file_name:52query = query.filter(FileDocModel.file_name.ilike(file_name))53query.delete(synchronize_session=False)54session.commit()55return docs56
57
58@with_session
59def add_docs_to_db(session,60kb_name: str,61file_name: str,62doc_infos: List[Dict]):63'''64将某知识库某文件对应的所有Document信息添加到数据库。
65doc_infos形式:[{"id": str, "metadata": dict}, ...]
66'''
67# ! 这里会出现doc_infos为None的情况,需要进一步排查68if doc_infos is None:69print("输入的server.db.repository.knowledge_file_repository.add_docs_to_db的doc_infos参数为None")70return False71for d in doc_infos:72obj = FileDocModel(73kb_name=kb_name,74file_name=file_name,75doc_id=d["id"],76meta_data=d["metadata"],77)78session.add(obj)79return True80
81
82@with_session
83def count_files_from_db(session, kb_name: str) -> int:84return session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).count()85
86
87@with_session
88def list_files_from_db(session, kb_name):89files = session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(kb_name)).all()90docs = [f.file_name for f in files]91return docs92
93
94@with_session
95def add_file_to_db(session,96kb_file: KnowledgeFile,97docs_count: int = 0,98custom_docs: bool = False,99doc_infos: List[Dict] = [], # 形式:[{"id": str, "metadata": dict}, ...]100):101kb = session.query(KnowledgeBaseModel).filter_by(kb_name=kb_file.kb_name).first()102if kb:103# 如果已经存在该文件,则更新文件信息与版本号104existing_file: KnowledgeFileModel = (session.query(KnowledgeFileModel)105.filter(KnowledgeFileModel.kb_name.ilike(kb_file.kb_name),106KnowledgeFileModel.file_name.ilike(kb_file.filename))107.first())108mtime = kb_file.get_mtime()109size = kb_file.get_size()110
111if existing_file:112existing_file.file_mtime = mtime113existing_file.file_size = size114existing_file.docs_count = docs_count115existing_file.custom_docs = custom_docs116existing_file.file_version += 1117# 否则,添加新文件118else:119new_file = KnowledgeFileModel(120file_name=kb_file.filename,121file_ext=kb_file.ext,122kb_name=kb_file.kb_name,123document_loader_name=kb_file.document_loader_name,124text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter",125file_mtime=mtime,126file_size=size,127docs_count=docs_count,128custom_docs=custom_docs,129)130kb.file_count += 1131session.add(new_file)132add_docs_to_db(kb_name=kb_file.kb_name, file_name=kb_file.filename, doc_infos=doc_infos)133return True134
135
136@with_session
137def delete_file_from_db(session, kb_file: KnowledgeFile):138existing_file = (session.query(KnowledgeFileModel)139.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),140KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))141.first())142if existing_file:143session.delete(existing_file)144delete_docs_from_db(kb_name=kb_file.kb_name, file_name=kb_file.filename)145session.commit()146
147kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(kb_file.kb_name)).first()148if kb:149kb.file_count -= 1150session.commit()151return True152
153
154@with_session
155def delete_files_from_db(session, knowledge_base_name: str):156session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(157synchronize_session=False)158session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(159synchronize_session=False)160kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()161if kb:162kb.file_count = 0163
164session.commit()165return True166
167
168@with_session
169def file_exists_in_db(session, kb_file: KnowledgeFile):170existing_file = (session.query(KnowledgeFileModel)171.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),172KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))173.first())174return True if existing_file else False175
176
177@with_session
178def get_file_detail(session, kb_name: str, filename: str) -> dict:179file: KnowledgeFileModel = (session.query(KnowledgeFileModel)180.filter(KnowledgeFileModel.file_name.ilike(filename),181KnowledgeFileModel.kb_name.ilike(kb_name))182.first())183if file:184return {185"kb_name": file.kb_name,186"file_name": file.file_name,187"file_ext": file.file_ext,188"file_version": file.file_version,189"document_loader": file.document_loader_name,190"text_splitter": file.text_splitter_name,191"create_time": file.create_time,192"file_mtime": file.file_mtime,193"file_size": file.file_size,194"custom_docs": file.custom_docs,195"docs_count": file.docs_count,196}197else:198return {}199