skypilot
1# Starts a GUI server that connects to the Qwen OpenAI API server.
2#
3# Refer to llm/qwen/README.md for more details.
4#
5# Usage:
6#
7# 1. If you have a endpoint started on a cluster (sky launch):
8# `sky launch -c qwen-gui ./gui.yaml --env ENDPOINT=$(sky status --ip qwen):8000`
9# 2. If you have a SkyPilot Service started (sky serve up) called qwen:
10# `sky launch -c qwen-gui ./gui.yaml --env ENDPOINT=$(sky serve status --endpoint qwen)`
11#
12# After the GUI server is started, you will see a gradio link in the output and
13# you can click on it to open the GUI.
14
15envs:
16ENDPOINT: x.x.x.x:3031 # Address of the API server running qwen.
17
18resources:
19cpus: 2
20
21setup: |
22conda activate qwen
23if [ $? -ne 0 ]; then
24conda create -n qwen python=3.10 -y
25conda activate qwen
26fi
27
28pip install "fschat[model_worker,webui]"
29pip install "openai<1"
30
31run: |
32conda activate qwen
33export PATH=$PATH:/sbin
34WORKER_IP=$(hostname -I | cut -d' ' -f1)
35CONTROLLER_PORT=21001
36WORKER_PORT=21002
37
38cat <<EOF > ~/model_info.json
39{
40"Qwen/Qwen1.5-72B-Chat": {
41"model_name": "Qwen/Qwen1.5-72B-Chat",
42"api_base": "http://${ENDPOINT}/v1",
43"api_key": "empty",
44"model_path": "Qwen/Qwen1.5-72B-Chat"
45}
46}
47EOF
48
49python3 -m fastchat.serve.controller --host 0.0.0.0 --port ${CONTROLLER_PORT} > ~/controller.log 2>&1 &
50
51echo 'Starting gradio server...'
52python -u -m fastchat.serve.gradio_web_server --share \
53--register-openai-compatible-models ~/model_info.json | tee ~/gradio.log
54