ollama
47 строк · 1.3 Кб
1import json
2import requests
3
4# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
5model = "llama2" # TODO: update this for whatever model you wish to use
6
7
8def chat(messages):
9r = requests.post(
10"http://0.0.0.0:11434/api/chat",
11json={"model": model, "messages": messages, "stream": True},
12)
13r.raise_for_status()
14output = ""
15
16for line in r.iter_lines():
17body = json.loads(line)
18if "error" in body:
19raise Exception(body["error"])
20if body.get("done") is False:
21message = body.get("message", "")
22content = message.get("content", "")
23output += content
24# the response streams one token at a time, print that as we receive it
25print(content, end="", flush=True)
26
27if body.get("done", False):
28message["content"] = output
29return message
30
31
32def main():
33messages = []
34
35while True:
36user_input = input("Enter a prompt: ")
37if not user_input:
38exit()
39print()
40messages.append({"role": "user", "content": user_input})
41message = chat(messages)
42messages.append(message)
43print("\n\n")
44
45
46if __name__ == "__main__":
47main()
48