2
"""This example demonstrates running a 7B RAG-instruct fine-tuned DRAGON model locally on a laptop"""
5
from llmware.prompts import Prompt
7
from datasets import load_dataset
10
# Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo
11
def load_rag_benchmark_tester_dataset():
12
dataset_name = "llmware/rag_instruct_benchmark_tester"
13
print(f"\n > Loading RAG dataset '{dataset_name}'...")
14
dataset = load_dataset(dataset_name)
17
for i, samples in enumerate(dataset["train"]):
18
test_set.append(samples)
23
# Run the benchmark test
24
def run_test(model_name, prompt_list):
25
print(f"\n > Loading model '{model_name}'")
26
prompter = Prompt().load_model(model_name)
28
print(f"\n > Running RAG Benchmark Test against '{model_name}' - 200 questions")
29
for i, entry in enumerate(prompt_list):
31
start_time = time.time()
33
prompt = entry["query"]
34
context = entry["context"]
35
response = prompter.prompt_main(prompt, context=context, prompt_name="default_with_context", temperature=0.3)
38
time_taken = round(time.time() - start_time, 2)
40
print(f"{i + 1}. llm_response - {response['llm_response']}")
41
print(f"{i + 1}. gold_answer - {entry['answer']}")
42
print(f"{i + 1}. time_taken - {time_taken}")
47
if __name__ == "__main__":
49
ds = load_rag_benchmark_tester_dataset()
51
# Supported Q4_K_M GGUF Dragon Models:
52
# -- llmware/dragon-yi-6b-gguf
53
# -- llmware/dragon-mistral-7b-gguf
54
# -- llmware/dragon-llama-7b-gguf
56
model_name = "llmware/dragon-yi-6b-gguf"
58
output = run_test(model_name,ds)