litellm

test_models.py
364 строки · 11.2 Кб
Перенос по словам
1
# What this tests ?
2
## Tests /models and /model/* endpoints
3

4
import pytest
5
import asyncio
6
import aiohttp
7
import os
8
import dotenv
9
from dotenv import load_dotenv
10

11
load_dotenv()
12

13

14
async def generate_key(session, models=[]):
15
    url = "http://0.0.0.0:4000/key/generate"
16
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
17
    data = {
18
        "models": models,
19
        "duration": None,
20
    }
21

22
    async with session.post(url, headers=headers, json=data) as response:
23
        status = response.status
24
        response_text = await response.text()
25

26
        print(response_text)
27
        print()
28

29
        if status != 200:
30
            raise Exception(f"Request did not return a 200 status code: {status}")
31
        return await response.json()
32

33

34
async def get_models(session, key):
35
    url = "http://0.0.0.0:4000/models"
36
    headers = {
37
        "Authorization": f"Bearer {key}",
38
        "Content-Type": "application/json",
39
    }
40

41
    async with session.get(url, headers=headers) as response:
42
        status = response.status
43
        response_text = await response.text()
44
        print("response from /models")
45
        print(response_text)
46
        print()
47

48
        if status != 200:
49
            raise Exception(f"Request did not return a 200 status code: {status}")
50

51

52
@pytest.mark.asyncio
53
async def test_get_models():
54
    async with aiohttp.ClientSession() as session:
55
        key_gen = await generate_key(session=session)
56
        key = key_gen["key"]
57
        await get_models(session=session, key=key)
58

59

60
async def add_models(session, model_id="123", model_name="azure-gpt-3.5"):
61
    url = "http://0.0.0.0:4000/model/new"
62
    headers = {
63
        "Authorization": f"Bearer sk-1234",
64
        "Content-Type": "application/json",
65
    }
66

67
    data = {
68
        "model_name": model_name,
69
        "litellm_params": {
70
            "model": "azure/chatgpt-v-2",
71
            "api_key": "os.environ/AZURE_API_KEY",
72
            "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
73
            "api_version": "2023-05-15",
74
        },
75
        "model_info": {"id": model_id},
76
    }
77

78
    async with session.post(url, headers=headers, json=data) as response:
79
        status = response.status
80
        response_text = await response.text()
81
        print(f"Add models {response_text}")
82
        print()
83

84
        if status != 200:
85
            raise Exception(f"Request did not return a 200 status code: {status}")
86

87
        response_json = await response.json()
88
        return response_json
89

90

91
async def get_model_info(session, key):
92
    """
93
    Make sure only models user has access to are returned
94
    """
95
    url = "http://0.0.0.0:4000/model/info"
96
    headers = {
97
        "Authorization": f"Bearer {key}",
98
        "Content-Type": "application/json",
99
    }
100

101
    async with session.get(url, headers=headers) as response:
102
        status = response.status
103
        response_text = await response.text()
104
        print(response_text)
105
        print()
106

107
        if status != 200:
108
            raise Exception(f"Request did not return a 200 status code: {status}")
109
        return await response.json()
110

111

112
async def chat_completion(session, key, model="azure-gpt-3.5"):
113
    url = "http://0.0.0.0:4000/chat/completions"
114
    headers = {
115
        "Authorization": f"Bearer {key}",
116
        "Content-Type": "application/json",
117
    }
118
    data = {
119
        "model": model,
120
        "messages": [
121
            {"role": "system", "content": "You are a helpful assistant."},
122
            {"role": "user", "content": "Hello!"},
123
        ],
124
    }
125

126
    async with session.post(url, headers=headers, json=data) as response:
127
        status = response.status
128
        response_text = await response.text()
129

130
        print(response_text)
131
        print()
132

133
        if status != 200:
134
            raise Exception(f"Request did not return a 200 status code: {status}")
135

136

137
@pytest.mark.asyncio
138
async def test_get_models():
139
    """
140
    Get models user has access to
141
    """
142
    async with aiohttp.ClientSession() as session:
143
        key_gen = await generate_key(session=session, models=["gpt-4"])
144
        key = key_gen["key"]
145
        response = await get_model_info(session=session, key=key)
146
        models = [m["model_name"] for m in response["data"]]
147
        for m in models:
148
            assert m == "gpt-4"
149

150

151
async def delete_model(session, model_id="123"):
152
    """
153
    Make sure only models user has access to are returned
154
    """
155
    url = "http://0.0.0.0:4000/model/delete"
156
    headers = {
157
        "Authorization": f"Bearer sk-1234",
158
        "Content-Type": "application/json",
159
    }
160
    data = {"id": model_id}
161

162
    async with session.post(url, headers=headers, json=data) as response:
163
        status = response.status
164
        response_text = await response.text()
165
        print(response_text)
166
        print()
167

168
        if status != 200:
169
            raise Exception(f"Request did not return a 200 status code: {status}")
170
        return await response.json()
171

172

173
@pytest.mark.asyncio
174
async def test_add_and_delete_models():
175
    """
176
    - Add model
177
    - Call new model -> expect to pass
178
    - Delete model
179
    - Call model -> expect to fail
180
    """
181
    import uuid
182

183
    async with aiohttp.ClientSession() as session:
184
        key_gen = await generate_key(session=session)
185
        key = key_gen["key"]
186
        model_id = f"12345_{uuid.uuid4()}"
187
        model_name = f"{uuid.uuid4()}"
188
        response = await add_models(
189
            session=session, model_id=model_id, model_name=model_name
190
        )
191
        assert response["model_id"] == model_id
192
        await asyncio.sleep(10)
193
        await chat_completion(session=session, key=key, model=model_name)
194
        await delete_model(session=session, model_id=model_id)
195
        try:
196
            await chat_completion(session=session, key=key, model=model_name)
197
            pytest.fail(f"Expected call to fail.")
198
        except:
199
            pass
200

201

202
async def add_model_for_health_checking(session, model_id="123"):
203
    url = "http://0.0.0.0:4000/model/new"
204
    headers = {
205
        "Authorization": f"Bearer sk-1234",
206
        "Content-Type": "application/json",
207
    }
208

209
    data = {
210
        "model_name": f"azure-model-health-check-{model_id}",
211
        "litellm_params": {
212
            "model": "azure/chatgpt-v-2",
213
            "api_key": os.getenv("AZURE_API_KEY"),
214
            "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
215
            "api_version": "2023-05-15",
216
        },
217
        "model_info": {"id": model_id},
218
    }
219

220
    async with session.post(url, headers=headers, json=data) as response:
221
        status = response.status
222
        response_text = await response.text()
223

224
        print(f"Add models {response_text}")
225
        print()
226

227
        if status != 200:
228
            raise Exception(f"Request did not return a 200 status code: {status}")
229

230

231
async def get_model_info_v2(session, key):
232
    url = "http://0.0.0.0:4000/v2/model/info"
233
    headers = {
234
        "Authorization": f"Bearer {key}",
235
        "Content-Type": "application/json",
236
    }
237

238
    async with session.get(url, headers=headers) as response:
239
        status = response.status
240
        response_text = await response.text()
241
        print("response from v2/model/info")
242
        print(response_text)
243
        print()
244

245
        if status != 200:
246
            raise Exception(f"Request did not return a 200 status code: {status}")
247

248

249
async def get_specific_model_info_v2(session, key, model_name):
250
    url = "http://0.0.0.0:4000/v2/model/info?debug=True&model=" + model_name
251
    print("running /model/info check for model=", model_name)
252

253
    headers = {
254
        "Authorization": f"Bearer {key}",
255
        "Content-Type": "application/json",
256
    }
257

258
    async with session.get(url, headers=headers) as response:
259
        status = response.status
260
        response_text = await response.text()
261
        print("response from v2/model/info")
262
        print(response_text)
263
        print()
264

265
        _json_response = await response.json()
266
        print("JSON response from /v2/model/info?model=", model_name, _json_response)
267

268
        _model_info = _json_response["data"]
269
        assert len(_model_info) == 1, f"Expected 1 model, got {len(_model_info)}"
270

271
        if status != 200:
272
            raise Exception(f"Request did not return a 200 status code: {status}")
273
        return _model_info[0]
274

275

276
async def get_model_health(session, key, model_name):
277
    url = "http://0.0.0.0:4000/health?model=" + model_name
278
    headers = {
279
        "Authorization": f"Bearer {key}",
280
        "Content-Type": "application/json",
281
    }
282

283
    async with session.get(url, headers=headers) as response:
284
        status = response.status
285
        response_text = await response.json()
286
        print("response from /health?model=", model_name)
287
        print(response_text)
288
        print()
289

290
        if status != 200:
291
            raise Exception(f"Request did not return a 200 status code: {status}")
292
    return response_text
293

294

295
@pytest.mark.asyncio
296
async def test_add_model_run_health():
297
    """
298
    Add model
299
    Call /model/info and v2/model/info
300
    -> Admin UI calls v2/model/info
301
    Call /chat/completions
302
    Call /health
303
    -> Ensure the health check for the endpoint is working as expected
304
    """
305
    import uuid
306

307
    async with aiohttp.ClientSession() as session:
308
        key_gen = await generate_key(session=session)
309
        key = key_gen["key"]
310
        master_key = "sk-1234"
311
        model_id = str(uuid.uuid4())
312
        model_name = f"azure-model-health-check-{model_id}"
313
        print("adding model", model_name)
314
        await add_model_for_health_checking(session=session, model_id=model_id)
315
        _old_model_info = await get_specific_model_info_v2(
316
            session=session, key=key, model_name=model_name
317
        )
318
        print("model info before test", _old_model_info)
319

320
        await asyncio.sleep(30)
321
        print("calling /model/info")
322
        await get_model_info(session=session, key=key)
323
        print("calling v2/model/info")
324
        await get_model_info_v2(session=session, key=key)
325

326
        print("calling /chat/completions -> expect to work")
327
        await chat_completion(session=session, key=key, model=model_name)
328

329
        print("calling /health?model=", model_name)
330
        _health_info = await get_model_health(
331
            session=session, key=master_key, model_name=model_name
332
        )
333
        _healthy_endpooint = _health_info["healthy_endpoints"][0]
334

335
        assert _health_info["healthy_count"] == 1
336
        assert (
337
            _healthy_endpooint["model"] == "azure/chatgpt-v-2"
338
        )  # this is the model that got added
339

340
        # assert httpx client is is unchanges
341

342
        await asyncio.sleep(10)
343

344
        _model_info_after_test = await get_specific_model_info_v2(
345
            session=session, key=key, model_name=model_name
346
        )
347

348
        print("model info after test", _model_info_after_test)
349
        old_openai_client = _old_model_info["openai_client"]
350
        new_openai_client = _model_info_after_test["openai_client"]
351
        print("old openai client", old_openai_client)
352
        print("new openai client", new_openai_client)
353

354
        """
355
        PROD TEST - This is extremly important 
356
        The OpenAI client used should be the same after 30 seconds
357
        It is a serious bug if the openai client does not match here
358
        """
359
        assert (
360
            old_openai_client == new_openai_client
361
        ), "OpenAI client does not match for the same model after 30 seconds"
362

363
        # cleanup
364
        await delete_model(session=session, model_id=model_id)
365
litellm

Использование cookies