Langchain-Chatchat
220 строк · 8.6 Кб
1from fastapi import Body2from configs import (DEFAULT_VS_TYPE, EMBEDDING_MODEL,3OVERLAP_SIZE,4logger, log_verbose, )5from server.knowledge_base.utils import (list_files_from_folder)6from sse_starlette import EventSourceResponse7import json8from server.knowledge_base.kb_service.base import KBServiceFactory9from typing import List, Optional10from server.knowledge_base.kb_summary.base import KBSummaryService11from server.knowledge_base.kb_summary.summary_chunk import SummaryAdapter12from server.utils import wrap_done, get_ChatOpenAI, BaseResponse13from configs import LLM_MODELS, TEMPERATURE14from server.knowledge_base.model.kb_document_model import DocumentWithVSId15
16def recreate_summary_vector_store(17knowledge_base_name: str = Body(..., examples=["samples"]),18allow_empty_kb: bool = Body(True),19vs_type: str = Body(DEFAULT_VS_TYPE),20embed_model: str = Body(EMBEDDING_MODEL),21file_description: str = Body(''),22model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),23temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),24max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),25):26"""27重建单个知识库文件摘要
28:param max_tokens:
29:param model_name:
30:param temperature:
31:param file_description:
32:param knowledge_base_name:
33:param allow_empty_kb:
34:param vs_type:
35:param embed_model:
36:return:
37"""
38
39def output():40
41kb = KBServiceFactory.get_service(knowledge_base_name, vs_type, embed_model)42if not kb.exists() and not allow_empty_kb:43yield {"code": 404, "msg": f"未找到知识库 ‘{knowledge_base_name}’"}44else:45# 重新创建知识库46kb_summary = KBSummaryService(knowledge_base_name, embed_model)47kb_summary.drop_kb_summary()48kb_summary.create_kb_summary()49
50llm = get_ChatOpenAI(51model_name=model_name,52temperature=temperature,53max_tokens=max_tokens,54)55reduce_llm = get_ChatOpenAI(56model_name=model_name,57temperature=temperature,58max_tokens=max_tokens,59)60# 文本摘要适配器61summary = SummaryAdapter.form_summary(llm=llm,62reduce_llm=reduce_llm,63overlap_size=OVERLAP_SIZE)64files = list_files_from_folder(knowledge_base_name)65
66i = 067for i, file_name in enumerate(files):68
69doc_infos = kb.list_docs(file_name=file_name)70docs = summary.summarize(file_description=file_description,71docs=doc_infos)72
73status_kb_summary = kb_summary.add_kb_summary(summary_combine_docs=docs)74if status_kb_summary:75logger.info(f"({i + 1} / {len(files)}): {file_name} 总结完成")76yield json.dumps({77"code": 200,78"msg": f"({i + 1} / {len(files)}): {file_name}",79"total": len(files),80"finished": i + 1,81"doc": file_name,82}, ensure_ascii=False)83else:84
85msg = f"知识库'{knowledge_base_name}'总结文件‘{file_name}’时出错。已跳过。"86logger.error(msg)87yield json.dumps({88"code": 500,89"msg": msg,90})91i += 192
93return EventSourceResponse(output())94
95
96def summary_file_to_vector_store(97knowledge_base_name: str = Body(..., examples=["samples"]),98file_name: str = Body(..., examples=["test.pdf"]),99allow_empty_kb: bool = Body(True),100vs_type: str = Body(DEFAULT_VS_TYPE),101embed_model: str = Body(EMBEDDING_MODEL),102file_description: str = Body(''),103model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),104temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),105max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),106):107"""108单个知识库根据文件名称摘要
109:param model_name:
110:param max_tokens:
111:param temperature:
112:param file_description:
113:param file_name:
114:param knowledge_base_name:
115:param allow_empty_kb:
116:param vs_type:
117:param embed_model:
118:return:
119"""
120
121def output():122kb = KBServiceFactory.get_service(knowledge_base_name, vs_type, embed_model)123if not kb.exists() and not allow_empty_kb:124yield {"code": 404, "msg": f"未找到知识库 ‘{knowledge_base_name}’"}125else:126# 重新创建知识库127kb_summary = KBSummaryService(knowledge_base_name, embed_model)128kb_summary.create_kb_summary()129
130llm = get_ChatOpenAI(131model_name=model_name,132temperature=temperature,133max_tokens=max_tokens,134)135reduce_llm = get_ChatOpenAI(136model_name=model_name,137temperature=temperature,138max_tokens=max_tokens,139)140# 文本摘要适配器141summary = SummaryAdapter.form_summary(llm=llm,142reduce_llm=reduce_llm,143overlap_size=OVERLAP_SIZE)144
145doc_infos = kb.list_docs(file_name=file_name)146docs = summary.summarize(file_description=file_description,147docs=doc_infos)148
149status_kb_summary = kb_summary.add_kb_summary(summary_combine_docs=docs)150if status_kb_summary:151logger.info(f" {file_name} 总结完成")152yield json.dumps({153"code": 200,154"msg": f"{file_name} 总结完成",155"doc": file_name,156}, ensure_ascii=False)157else:158
159msg = f"知识库'{knowledge_base_name}'总结文件‘{file_name}’时出错。已跳过。"160logger.error(msg)161yield json.dumps({162"code": 500,163"msg": msg,164})165
166return EventSourceResponse(output())167
168
169def summary_doc_ids_to_vector_store(170knowledge_base_name: str = Body(..., examples=["samples"]),171doc_ids: List = Body([], examples=[["uuid"]]),172vs_type: str = Body(DEFAULT_VS_TYPE),173embed_model: str = Body(EMBEDDING_MODEL),174file_description: str = Body(''),175model_name: str = Body(LLM_MODELS[0], description="LLM 模型名称。"),176temperature: float = Body(TEMPERATURE, description="LLM 采样温度", ge=0.0, le=1.0),177max_tokens: Optional[int] = Body(None, description="限制LLM生成Token数量,默认None代表模型最大值"),178) -> BaseResponse:179"""180单个知识库根据doc_ids摘要
181:param knowledge_base_name:
182:param doc_ids:
183:param model_name:
184:param max_tokens:
185:param temperature:
186:param file_description:
187:param vs_type:
188:param embed_model:
189:return:
190"""
191kb = KBServiceFactory.get_service(knowledge_base_name, vs_type, embed_model)192if not kb.exists():193return BaseResponse(code=404, msg=f"未找到知识库 {knowledge_base_name}", data={})194else:195llm = get_ChatOpenAI(196model_name=model_name,197temperature=temperature,198max_tokens=max_tokens,199)200reduce_llm = get_ChatOpenAI(201model_name=model_name,202temperature=temperature,203max_tokens=max_tokens,204)205# 文本摘要适配器206summary = SummaryAdapter.form_summary(llm=llm,207reduce_llm=reduce_llm,208overlap_size=OVERLAP_SIZE)209
210doc_infos = kb.get_doc_by_ids(ids=doc_ids)211# doc_infos转换成DocumentWithVSId包装的对象212doc_info_with_ids = [DocumentWithVSId(**doc.dict(), id=with_id) for with_id, doc in zip(doc_ids, doc_infos)]213
214docs = summary.summarize(file_description=file_description,215docs=doc_info_with_ids)216
217# 将docs转换成dict218resp_summarize = [{**doc.dict()} for doc in docs]219
220return BaseResponse(code=200, msg="总结完成", data={"summarize": resp_summarize})221