3
This example shows how to use 'Open Chat' inference models that expose an endpoint compatible with the
4
OpenAI API - using 'api_base' to configure the endpoint uri
6
For example, to integrate a model on LM Studio with standard configuration:
7
-- api_base = 'http://localhost:1234/v1'
9
Please also note that llmware implements llama.cpp directly, so you can run inference on any GGUF models
10
very easily and natively in llmware - see the GGUF example in /Models/using_gguf.py'
14
from llmware.models import ModelCatalog
15
from llmware.prompts import Prompt
18
# one step process: add the open chat model to the Model Registry
20
# model_name = "my_open_chat_model1"
21
# api_base = uri_path to the proposed endpoint
22
# prompt_wrapper = alpaca | <INST> | chat_ml | hf_chat | human_bot
23
# <INST> -> Llama2-Chat
24
# hf_chat -> Zephyr-Mistral
25
# chat_ml -> OpenHermes - Mistral
26
# human_bot -> Dragon models
27
# model_type = "chat" (alternative: "completion")
29
ModelCatalog().register_open_chat_model("my_open_chat_model1",
30
api_base="http://localhost:1234/v1",
31
prompt_wrapper="<INST>",
34
# once registered, you can invoke like any other model in llmware
36
prompter = Prompt().load_model("my_open_chat_model1")
37
response = prompter.prompt_main("What is the future of AI?")
40
# you can (optionally) register multiple open chat models with different api_base and model attributes
42
ModelCatalog().register_open_chat_model("my_open_chat_model2",
43
api_base="http://localhost:5678/v1",
44
prompt_wrapper="hf_chat",
48
# you can also alternate with open ai models - which will 'revert' to the default openai api_base
50
openai_prompter = Prompt().load_model("gpt-3.5.-turbo-instruct")
53
# if you list all of the models in the catalog, you will see the two newly created open chat models
55
my_models = ModelCatalog().list_all_models()
57
for i, mods in enumerate(my_models):
58
print("models: ", i, mods)