zephyr-7b-beta-rag-demo
/
app.py
106 строк · 3.1 Кб
1from langchain import PromptTemplate, LLMChain
2from langchain.llms import CTransformers
3import os
4from langchain.text_splitter import RecursiveCharacterTextSplitter
5from langchain.vectorstores import Chroma
6from langchain.chains import RetrievalQA
7from langchain.embeddings import HuggingFaceBgeEmbeddings
8from io import BytesIO
9from langchain.document_loaders import PyPDFLoader
10import gradio as gr
11
12
13local_llm = "zephyr-7b-beta.Q5_K_S.gguf"
14
15config = {
16'max_new_tokens': 1024,
17'repetition_penalty': 1.1,
18'temperature': 0.1,
19'top_k': 50,
20'top_p': 0.9,
21'stream': True,
22'threads': int(os.cpu_count() / 2)
23}
24
25llm = CTransformers(
26model=local_llm,
27model_type="mistral",
28lib="avx2", #for CPU use
29**config
30)
31
32print("LLM Initialized...")
33
34
35prompt_template = """Use the following pieces of information to answer the user's question.
36If you don't know the answer, just say that you don't know, don't try to make up an answer.
37
38Context: {context}
39Question: {question}
40
41Only return the helpful answer below and nothing else.
42Helpful answer:
43"""
44
45model_name = "BAAI/bge-large-en"
46model_kwargs = {'device': 'cpu'}
47encode_kwargs = {'normalize_embeddings': False}
48embeddings = HuggingFaceBgeEmbeddings(
49model_name=model_name,
50model_kwargs=model_kwargs,
51encode_kwargs=encode_kwargs
52)
53
54
55prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
56load_vector_store = Chroma(persist_directory="stores/pet_cosine", embedding_function=embeddings)
57retriever = load_vector_store.as_retriever(search_kwargs={"k":1})
58# query = "what is the fastest speed for a greyhound dog?"
59# semantic_search = retriever.get_relevant_documents(query)
60# print(semantic_search)
61
62print("######################################################################")
63
64chain_type_kwargs = {"prompt": prompt}
65
66# qa = RetrievalQA.from_chain_type(
67# llm=llm,
68# chain_type="stuff",
69# retriever=retriever,
70# return_source_documents = True,
71# chain_type_kwargs= chain_type_kwargs,
72# verbose=True
73# )
74
75# response = qa(query)
76
77# print(response)
78
79sample_prompts = ["what is the fastest speed for a greyhound dog?", "Why should we not feed chocolates to the dogs?", "Name two factors which might contribute to why some dogs might get scared?"]
80
81def get_response(input):
82query = input
83chain_type_kwargs = {"prompt": prompt}
84qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
85response = qa(query)
86return response
87
88input = gr.Text(
89label="Prompt",
90show_label=False,
91max_lines=1,
92placeholder="Enter your prompt",
93container=False,
94)
95
96iface = gr.Interface(fn=get_response,
97inputs=input,
98outputs="text",
99title="My Dog PetCare Bot",
100description="This is a RAG implementation based on Zephyr 7B Beta LLM.",
101examples=sample_prompts,
102allow_screenshot=False,
103allow_flagging=False
104)
105
106iface.launch()
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125