llmware

using-ollama-models.py
80 строк · 4.1 Кб
Перенос по словам
1

2
""" This example illustrates how to use Ollama models in llmware.  It assumes that you have separately
3
    downloaded and installed Ollama and used 'ollama run {model_name}' to cache several models in
4
    ollama. """
5

6
from llmware.models import ModelCatalog
7

8
#   Step 1 - register your Ollama models in llmware ModelCatalog
9
#   -- these two lines will register: llama2 and mistral models
10
#   -- note: assumes that you have previously cached and installed both of these models with ollama locally
11

12
#   register llama2
13
ModelCatalog().register_ollama_model(model_name="llama2",model_type="chat",host="localhost",port=11434)
14

15
#   register mistral - note: if you are using ollama defaults, then OK to register with ollama model name only
16
ModelCatalog().register_ollama_model(model_name="mistral")
17

18
#   optional - confirm that model was registered
19
my_new_model_card = ModelCatalog().lookup_model_card("llama2")
20
print("\nupdate: confirming - new ollama model card - ", my_new_model_card)
21

22
#   Step 2 - start using the Ollama model like any other model in llmware
23

24
print("\nupdate: calling ollama llama 2 model ...")
25

26
model = ModelCatalog().load_model("llama2")
27
response = model.inference("why is the sky blue?")
28

29
print("update: example #1 - ollama llama 2 response - ", response)
30

31
#   Tip: if you are loading 'llama2' chat model from Ollama, note that it is already included in
32
#   the llmware model catalog under a different name, "TheBloke/Llama-2-7B-Chat-GGUF"
33
#   the llmware model name maps to the original HuggingFace repository, and is a nod to "TheBloke" who has
34
#   led the popularization of GGUF - and is responsible for creating most of the GGUF model versions.
35
#   --llmware uses the "Q4_K_M" model by default, while Ollama generally prefers "Q4_0"
36

37
print("\nupdate: calling Llama-2-7B-Chat-GGUF in llmware catalog ...")
38

39
model = ModelCatalog().load_model("TheBloke/Llama-2-7B-Chat-GGUF")
40
response = model.inference("why is the sky blue?")
41

42
print("update: example #1 - [compare] - llmware / Llama-2-7B-Chat-GGUF response - ", response)
43

44
#   Now, let's try the Ollama Mistral model with a context passage
45

46
model2 = ModelCatalog().load_model("mistral")
47

48
context_passage= ("NASA’s rover Perseverance has gathered data confirming the existence of ancient lake "
49
                  "sediments deposited by water that once filled a giant basin on Mars called Jerezo Crater, "
50
                  "according to a study published on Friday.  The findings from ground-penetrating radar "
51
                  "observations conducted by the robotic rover substantiate previous orbital imagery and "
52
                  "other data leading scientists to theorize that portions of Mars were once covered in water "
53
                  "and may have harbored microbial life.  The research, led by teams from the University of "
54
                  "California at Los Angeles (UCLA) and the University of Oslo, was published in the "
55
                  "journal Science Advances. It was based on subsurface scans taken by the car-sized, six-wheeled "
56
                  "rover over several months of 2022 as it made its way across the Martian surface from the "
57
                  "crater floor onto an adjacent expanse of braided, sedimentary-like features resembling, "
58
                  "from orbit, the river deltas found on Earth.")
59

60
response = model2.inference("What are the top 3 points?", add_context=context_passage)
61

62
print("\nupdate: calling ollama mistral model ...")
63

64
print("update: example #2 - ollama mistral response - ", response)
65

66
#   Step 3 - using the ollama discovery API - optional
67

68
discovery = model2.discover_models()
69
print("\nupdate: example #3 - checking ollama model manifest list: ", discovery)
70

71
if len(discovery) > 0:
72
    # note: assumes tht you have at least one model registered in ollama -otherwise, may throw error
73
    for i, models in enumerate(discovery["models"]):
74
        print("ollama models: ", i, models)
75

76

77
# for more information and other alternatives for using GGUF models, please see the following examples:
78
#   -- examples/Models/chat_gguf_fast_start.py
79
#   -- examples/Models/using_gguf.py
80
#   -- examples/Models/using-open-chat-models.py
81
#   -- examples/Models/dragon-gguf_fast_start.py
82
llmware

Использование cookies