rag-chatbot-2
30 строк · 1011.0 Байт
1import time
2from pathlib import Path
3
4from exp_lama_cpp.model import Model, get_model_setting
5
6# CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
7
8if __name__ == "__main__":
9root_folder = Path(__file__).resolve().parent.parent.parent
10model_folder = root_folder / "models"
11Path(model_folder).parent.mkdir(parents=True, exist_ok=True)
12
13model_settings = get_model_setting("stablelm-zephyr")
14
15llm = Model(model_folder, model_settings)
16
17start_time = time.time()
18prompt = llm.generate_summarization_prompt(text="<put the text here>")
19output = llm.generate_answer(prompt, max_new_tokens=512)
20print(output)
21took = time.time() - start_time
22print(f"\n--- Took {took:.2f} seconds ---")
23
24start_time = time.time()
25stream = llm.start_answer_iterator_streamer(prompt, max_new_tokens=256)
26for output in stream:
27print(output["choices"][0]["text"], end="", flush=True)
28took = time.time() - start_time
29
30print(f"\n--- Took {took:.2f} seconds ---")
31