llmware

using-custom-gguf-model.py
49 строк · 2.0 Кб
Перенос по словам
1

2
""" This example shows how to use any gguf model available on HuggingFace, and start using in inferences and
3
workflows with llmware.  In this scenario, we will take the following steps:
4

5
    1.  Register new GGUF model
6
    2.  Register new finetune wrapper, if needed
7
    3.  Start running inferences
8
"""
9

10
import time
11
import re
12

13
from llmware.models import ModelCatalog
14
from llmware.prompts import Prompt
15

16
#   Step 1 - register new gguf model - we will pick the popular LLama-2-13B-chat-GGUF
17

18
ModelCatalog().register_gguf_model(model_name="TheBloke/Llama-2-13B-chat-GGUF-Q2",
19
                                   gguf_model_repo="TheBloke/Llama-2-13B-chat-GGUF",
20
                                   gguf_model_file_name="llama-2-13b-chat.Q2_K.gguf",
21
                                   prompt_wrapper="my_version_inst")
22

23
#   Step 2- if the prompt_wrapper is a standard, e.g., Meta's <INST>, then no need to do anything else
24
#   -- however, if the model uses a custom prompt wrapper, then we need to define that too
25
#   -- in this case, we are going to create our "own version" of the Meta <INST> wrapper
26

27
ModelCatalog().register_new_finetune_wrapper("my_version_inst", main_start="<INST>", llm_start="</INST>")
28

29
#   Once we have completed these two steps, we are done - and can begin to use the model like any other
30

31
prompter = Prompt().load_model("TheBloke/Llama-2-13B-chat-GGUF-Q2")
32

33
question_list = ["I am interested in gaining an understanding of the banking industry. What topics should I research?",
34
                 "What are some tips for creating a successful business plan?",
35
                 "What are the best books to read for a class on American literature?"]
36

37

38
for i, entry in enumerate(question_list):
39

40
    start_time = time.time()
41
    print("\n")
42
    print(f"query - {i + 1} - {entry}")
43

44
    response = prompter.prompt_main(entry)
45

46
    # Print results
47
    time_taken = round(time.time() - start_time, 2)
48
    llm_response = re.sub("[\n\n]", "\n", response['llm_response'])
49
    print(f"llm_response - {i + 1} - {llm_response}")
50
    print(f"time_taken - {i + 1} - {time_taken}")
51

52
llmware

Использование cookies