text-generation-inference
101 строка · 3.1 Кб
1import pytest
2
3from text_generation_server.pb import generate_pb2
4from text_generation_server.models.causal_lm import CausalLMBatch
5from text_generation_server.models.santacoder import SantaCoder
6
7
8@pytest.fixture(scope="session")
9def default_santacoder():
10return SantaCoder("bigcode/santacoder")
11
12
13@pytest.fixture
14def default_pb_request(default_pb_parameters, default_pb_stop_parameters):
15return generate_pb2.Request(
16id=0,
17inputs="def",
18prefill_logprobs=True,
19truncate=100,
20parameters=default_pb_parameters,
21stopping_parameters=default_pb_stop_parameters,
22)
23
24
25@pytest.fixture
26def default_pb_batch(default_pb_request):
27return generate_pb2.Batch(id=0, requests=[default_pb_request], size=1)
28
29
30@pytest.fixture
31def default_fim_pb_request(default_pb_parameters, default_pb_stop_parameters):
32return generate_pb2.Request(
33id=0,
34inputs="<fim-prefix>def<fim-suffix>world<fim-middle>",
35prefill_logprobs=True,
36truncate=100,
37parameters=default_pb_parameters,
38stopping_parameters=default_pb_stop_parameters,
39)
40
41
42@pytest.fixture
43def default_fim_pb_batch(default_fim_pb_request):
44return generate_pb2.Batch(id=0, requests=[default_fim_pb_request], size=1)
45
46
47@pytest.mark.skip
48def test_santacoder_generate_token_completion(default_santacoder, default_pb_batch):
49batch = CausalLMBatch.from_pb(
50default_pb_batch,
51default_santacoder.tokenizer,
52default_santacoder.dtype,
53default_santacoder.device,
54)
55next_batch = batch
56
57for _ in range(batch.stopping_criterias[0].max_new_tokens - 1):
58generations, next_batch, _ = default_santacoder.generate_token(next_batch)
59assert len(generations) == len(next_batch)
60
61generations, next_batch, _ = default_santacoder.generate_token(next_batch)
62assert next_batch is None
63
64assert len(generations) == 1
65assert generations[0].generated_text.text == " test_get_all_users_with_"
66assert generations[0].request_id == batch.requests[0].id
67assert (
68generations[0].generated_text.generated_tokens
69== batch.stopping_criterias[0].max_new_tokens
70)
71
72
73@pytest.mark.skip
74def test_fim_santacoder_generate_token_completion(
75default_santacoder, default_fim_pb_batch
76):
77batch = CausalLMBatch.from_pb(
78default_fim_pb_batch,
79default_santacoder.tokenizer,
80default_santacoder.dtype,
81default_santacoder.device,
82)
83next_batch = batch
84
85for _ in range(batch.stopping_criterias[0].max_new_tokens - 1):
86generations, next_batch, _ = default_santacoder.generate_token(next_batch)
87assert len(generations) == len(next_batch)
88
89generations, next_batch, _ = default_santacoder.generate_token(next_batch)
90assert next_batch is None
91
92assert len(generations) == 1
93assert (
94generations[0].generated_text.text
95== """ineProperty(exports, "__esModule", { value"""
96)
97assert generations[0].request_id == batch.requests[0].id
98assert (
99generations[0].generated_text.generated_tokens
100== batch.stopping_criterias[0].max_new_tokens
101)
102