6
from openai import OpenAI, AsyncOpenAI
7
from typing import Optional, List, Union
10
def response_header_check(response):
12
- assert if response headers < 4kb (nginx limit).
14
headers_size = sum(len(k) + len(v) for k, v in response.raw_headers)
15
assert headers_size < 4096, "Response headers exceed the 4kb limit"
18
async def generate_key(
22
"text-embedding-ada-002",
24
"fake-openai-endpoint-2",
27
url = "http://0.0.0.0:4000/key/generate"
28
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
34
async with session.post(url, headers=headers, json=data) as response:
35
status = response.status
36
response_text = await response.text()
42
raise Exception(f"Request did not return a 200 status code: {status}")
44
response_header_check(
48
return await response.json()
51
async def new_user(session):
52
url = "http://0.0.0.0:4000/user/new"
53
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
55
"models": ["gpt-4", "text-embedding-ada-002", "dall-e-2"],
59
async with session.post(url, headers=headers, json=data) as response:
60
status = response.status
61
response_text = await response.text()
67
raise Exception(f"Request did not return a 200 status code: {status}")
69
response_header_check(
72
return await response.json()
75
async def chat_completion(session, key, model: Union[str, List] = "gpt-4"):
76
url = "http://0.0.0.0:4000/chat/completions"
78
"Authorization": f"Bearer {key}",
79
"Content-Type": "application/json",
84
{"role": "system", "content": "You are a helpful assistant."},
85
{"role": "user", "content": "Hello!"},
89
async with session.post(url, headers=headers, json=data) as response:
90
status = response.status
91
response_text = await response.text()
97
raise Exception(f"Request did not return a 200 status code: {status}")
99
response_header_check(
103
return await response.json()
106
async def chat_completion_with_headers(session, key, model="gpt-4"):
107
url = "http://0.0.0.0:4000/chat/completions"
109
"Authorization": f"Bearer {key}",
110
"Content-Type": "application/json",
115
{"role": "system", "content": "You are a helpful assistant."},
116
{"role": "user", "content": "Hello!"},
120
async with session.post(url, headers=headers, json=data) as response:
121
status = response.status
122
response_text = await response.text()
128
raise Exception(f"Request did not return a 200 status code: {status}")
130
response_header_check(
134
raw_headers = response.raw_headers
135
raw_headers_json = {}
142
raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8")
144
return raw_headers_json
147
async def completion(session, key):
148
url = "http://0.0.0.0:4000/completions"
150
"Authorization": f"Bearer {key}",
151
"Content-Type": "application/json",
153
data = {"model": "gpt-4", "prompt": "Hello!"}
155
async with session.post(url, headers=headers, json=data) as response:
156
status = response.status
159
raise Exception(f"Request did not return a 200 status code: {status}")
161
response_header_check(
165
response = await response.json()
170
async def embeddings(session, key):
171
url = "http://0.0.0.0:4000/embeddings"
173
"Authorization": f"Bearer {key}",
174
"Content-Type": "application/json",
177
"model": "text-embedding-ada-002",
178
"input": ["hello world"],
181
async with session.post(url, headers=headers, json=data) as response:
182
status = response.status
183
response_text = await response.text()
188
raise Exception(f"Request did not return a 200 status code: {status}")
190
response_header_check(
195
async def image_generation(session, key):
196
url = "http://0.0.0.0:4000/images/generations"
198
"Authorization": f"Bearer {key}",
199
"Content-Type": "application/json",
203
"prompt": "A cute baby sea otter",
206
async with session.post(url, headers=headers, json=data) as response:
207
status = response.status
208
response_text = await response.text()
215
"Connection error" in response_text
218
raise Exception(f"Request did not return a 200 status code: {status}")
220
response_header_check(
226
async def test_chat_completion():
229
Make chat completion call
231
make chat completion call
233
async with aiohttp.ClientSession() as session:
234
key_gen = await generate_key(session=session)
236
await chat_completion(session=session, key=key)
237
key_gen = await new_user(session=session)
238
key_2 = key_gen["key"]
239
await chat_completion(session=session, key=key_2)
244
async def test_chat_completion_ratelimit():
246
- call model with rpm 1
247
- make 2 parallel calls
250
async with aiohttp.ClientSession() as session:
255
chat_completion(session=session, key=key, model="fake-openai-endpoint-2")
258
chat_completion(session=session, key=key, model="fake-openai-endpoint-2")
261
await asyncio.gather(*tasks)
262
pytest.fail("Expected at least 1 call to fail")
263
except Exception as e:
264
if "Request did not return a 200 status code: 429" in str(e):
267
pytest.fail(f"Wrong error received - {str(e)}")
271
async def test_chat_completion_different_deployments():
273
- call model group with 2 deployments
275
- expect 2 unique deployments
277
async with aiohttp.ClientSession() as session:
283
await chat_completion_with_headers(
284
session=session, key=key, model="fake-openai-endpoint-3"
288
print(f"results: {results}")
289
init_model_id = results[0]["x-litellm-model-id"]
290
deployments_shuffled = False
291
for result in results[1:]:
292
if init_model_id != result["x-litellm-model-id"]:
293
deployments_shuffled = True
294
if deployments_shuffled == False:
295
pytest.fail("Expected at least 1 shuffled call")
296
except Exception as e:
301
async def test_chat_completion_streaming():
303
[PROD Test] Ensures logprobs are returned correctly
305
client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
307
response = await client.chat.completions.create(
308
model="gpt-3.5-turbo-large",
309
messages=[{"role": "user", "content": "Hello!"}],
317
async for chunk in response:
318
response_str += chunk.choices[0].delta.content or ""
320
print(f"response_str: {response_str}")
324
async def test_chat_completion_old_key():
326
Production test for backwards compatibility. Test db against a pre-generated (old key)
328
Make chat completion call
330
async with aiohttp.ClientSession() as session:
332
key = "sk--W0Ph0uDZLVD7V7LQVrslg"
333
await chat_completion(session=session, key=key)
334
except Exception as e:
335
pytest.fail("Invalid api key")
339
async def test_completion():
342
Make chat completion call
344
make chat completion call
346
async with aiohttp.ClientSession() as session:
347
key_gen = await generate_key(session=session)
349
await completion(session=session, key=key)
350
key_gen = await new_user(session=session)
351
key_2 = key_gen["key"]
355
client = OpenAI(api_key=key_2, base_url="http://0.0.0.0:4000")
357
client.completions.create(
359
prompt="Say this is a test",
366
async def test_embeddings():
373
async with aiohttp.ClientSession() as session:
374
key_gen = await generate_key(session=session)
376
await embeddings(session=session, key=key)
377
key_gen = await new_user(session=session)
378
key_2 = key_gen["key"]
379
await embeddings(session=session, key=key_2)
383
async def test_image_generation():
390
async with aiohttp.ClientSession() as session:
391
key_gen = await generate_key(session=session)
393
await image_generation(session=session, key=key)
394
key_gen = await new_user(session=session)
395
key_2 = key_gen["key"]
396
await image_generation(session=session, key=key_2)
400
async def test_openai_wildcard_chat_completion():
402
- Create key for model = "*" -> this has access to all models
403
- proxy_server_config.yaml has model = *
404
- Make chat completion call
407
async with aiohttp.ClientSession() as session:
408
key_gen = await generate_key(session=session, models=["*"])
412
await chat_completion(session=session, key=key, model="gpt-3.5-turbo-0125")
416
async def test_batch_chat_completions():
418
- Make chat completion call using
421
async with aiohttp.ClientSession() as session:
424
response = await chat_completion(
427
model="gpt-3.5-turbo,fake-openai-endpoint",
430
print(f"response: {response}")
432
assert len(response) == 2
433
assert isinstance(response, list)