2
""" This example illustrates how to use Ollama models in llmware. It assumes that you have separately
3
downloaded and installed Ollama and used 'ollama run {model_name}' to cache several models in
6
from llmware.models import ModelCatalog
8
# Step 1 - register your Ollama models in llmware ModelCatalog
9
# -- these two lines will register: llama2 and mistral models
10
# -- note: assumes that you have previously cached and installed both of these models with ollama locally
13
ModelCatalog().register_ollama_model(model_name="llama2",model_type="chat",host="localhost",port=11434)
15
# register mistral - note: if you are using ollama defaults, then OK to register with ollama model name only
16
ModelCatalog().register_ollama_model(model_name="mistral")
18
# optional - confirm that model was registered
19
my_new_model_card = ModelCatalog().lookup_model_card("llama2")
20
print("\nupdate: confirming - new ollama model card - ", my_new_model_card)
22
# Step 2 - start using the Ollama model like any other model in llmware
24
print("\nupdate: calling ollama llama 2 model ...")
26
model = ModelCatalog().load_model("llama2")
27
response = model.inference("why is the sky blue?")
29
print("update: example #1 - ollama llama 2 response - ", response)
31
# Tip: if you are loading 'llama2' chat model from Ollama, note that it is already included in
32
# the llmware model catalog under a different name, "TheBloke/Llama-2-7B-Chat-GGUF"
33
# the llmware model name maps to the original HuggingFace repository, and is a nod to "TheBloke" who has
34
# led the popularization of GGUF - and is responsible for creating most of the GGUF model versions.
35
# --llmware uses the "Q4_K_M" model by default, while Ollama generally prefers "Q4_0"
37
print("\nupdate: calling Llama-2-7B-Chat-GGUF in llmware catalog ...")
39
model = ModelCatalog().load_model("TheBloke/Llama-2-7B-Chat-GGUF")
40
response = model.inference("why is the sky blue?")
42
print("update: example #1 - [compare] - llmware / Llama-2-7B-Chat-GGUF response - ", response)
44
# Now, let's try the Ollama Mistral model with a context passage
46
model2 = ModelCatalog().load_model("mistral")
48
context_passage= ("NASA’s rover Perseverance has gathered data confirming the existence of ancient lake "
49
"sediments deposited by water that once filled a giant basin on Mars called Jerezo Crater, "
50
"according to a study published on Friday. The findings from ground-penetrating radar "
51
"observations conducted by the robotic rover substantiate previous orbital imagery and "
52
"other data leading scientists to theorize that portions of Mars were once covered in water "
53
"and may have harbored microbial life. The research, led by teams from the University of "
54
"California at Los Angeles (UCLA) and the University of Oslo, was published in the "
55
"journal Science Advances. It was based on subsurface scans taken by the car-sized, six-wheeled "
56
"rover over several months of 2022 as it made its way across the Martian surface from the "
57
"crater floor onto an adjacent expanse of braided, sedimentary-like features resembling, "
58
"from orbit, the river deltas found on Earth.")
60
response = model2.inference("What are the top 3 points?", add_context=context_passage)
62
print("\nupdate: calling ollama mistral model ...")
64
print("update: example #2 - ollama mistral response - ", response)
66
# Step 3 - using the ollama discovery API - optional
68
discovery = model2.discover_models()
69
print("\nupdate: example #3 - checking ollama model manifest list: ", discovery)
72
# note: assumes tht you have at least one model registered in ollama -otherwise, may throw error
73
for i, models in enumerate(discovery["models"]):
74
print("ollama models: ", i, models)
77
# for more information and other alternatives for using GGUF models, please see the following examples:
78
# -- examples/Models/chat_gguf_fast_start.py
79
# -- examples/Models/using_gguf.py
80
# -- examples/Models/using-open-chat-models.py
81
# -- examples/Models/dragon-gguf_fast_start.py