skypilot

serve-7b.yaml

40 строк · 964.0 Байт

Перенос по словам

1
envs:
2
  MODEL_NAME: Qwen/Qwen1.5-7B-Chat
3

4
service:
5
  # Specifying the path to the endpoint to check the readiness of the replicas.
6
  readiness_probe:
7
    path: /v1/chat/completions
8
    post_data:
9
      model: $MODEL_NAME
10
      messages:
11
        - role: user
12
          content: Hello! What is your name?
13
      max_tokens: 1
14
    initial_delay_seconds: 1200
15
  # How many replicas to manage.
16
  replicas: 2
17
  
18

19
resources:
20
  accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
21
  disk_tier: best
22
  ports: 8000
23

24
setup: |
25
  conda activate qwen
26
  if [ $? -ne 0 ]; then
27
    conda create -n qwen python=3.10 -y
28
    conda activate qwen
29
  fi
30
  pip install -U vllm==0.3.2
31
  pip install -U transformers==4.38.0
32

33
run: |
34
  conda activate qwen
35
  export PATH=$PATH:/sbin
36
  python -m vllm.entrypoints.openai.api_server \
37
    --host 0.0.0.0 \
38
    --model $MODEL_NAME \
39
    --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
40
    --max-model-len 1024 | tee ~/openai_api_server.log
41

skypilot

Использование cookies