langfuse

Форк
0
/
usage.servertest.ts 
189 строк · 6.2 Кб
1
import { ModelUsageUnit } from "@/src/constants";
2
import { tokenCount } from "@/src/features/ingest/lib/usage";
3

4
describe("Token Count Functions", () => {
5
  const generateModel = (model: string, tokenizer: string) => {
6
    return {
7
      id: "1",
8
      modelName: model,
9
      tokenizerId: tokenizer,
10
      tokenizerConfig: {
11
        tokensPerMessage: 3,
12
        tokensPerName: 1,
13
        tokenizerModel: model,
14
      },
15
      createdAt: new Date(),
16
      updatedAt: new Date(),
17
      matchPattern: "",
18
      projectId: null,
19
      startDate: null,
20
      inputPrice: null,
21
      outputPrice: null,
22
      totalPrice: null,
23
      unit: ModelUsageUnit.Tokens,
24
    };
25
  };
26

27
  describe("token count for strings", () => {
28
    [
29
      { model: "gpt-3.5-turbo", tokenizer: "openai", tokens: 114 },
30
      { model: "text-embedding-ada-002", tokenizer: "openai", tokens: 114 },
31
      { model: "gpt-4-1106-preview", tokenizer: "openai", tokens: 114 },
32
      { model: "gpt-4-vision-preview", tokenizer: "openai", tokens: 114 },
33
      { model: "claude", tokenizer: "claude", tokens: 118 },
34
      { model: "claude-instant-1.2", tokenizer: "claude", tokens: 118 },
35
      { model: "gpt-3.5-turbo-1106", tokenizer: "openai", tokens: 114 },
36
    ].forEach(({ model, tokens, tokenizer }) => {
37
      it(`should return token count ${tokens} for ${model}`, () => {
38
        const result = tokenCount({
39
          model: generateModel(model, tokenizer),
40
          text: "Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.",
41
        });
42
        expect(result).toBeDefined();
43
        expect(result).toBe(tokens);
44
      });
45
    });
46

47
    it("should return undefined for unknown model", () => {
48
      const result = tokenCount({
49
        model: generateModel("unknown-model", "unknown-tokenizer"),
50
        text: "Hello, World!",
51
      });
52
      expect(result).toBeUndefined();
53
    });
54

55
    it("check extensive openai chat message", () => {
56
      const result = tokenCount({
57
        model: generateModel("gpt-3.5-turbo", "openai"),
58
        text: [
59
          {
60
            role: "system",
61
            content: "some test",
62
            id: "some-id",
63
            isPersisted: true,
64
          },
65
          {
66
            id: "some-id",
67
            content: "some test",
68
            role: "user",
69
            timestamp: "2024-01-00:00:00.488Z",
70
            isPersisted: true,
71
          },
72
          {
73
            id: "some id",
74
            content:
75
              "Hey Simon! 😊 How's your day going? Have you been up to anything interesting lately?",
76
            role: "user",
77
            timestamp: "2024-01-24T10:00:00.929Z",
78
            isPersisted: true,
79
          },
80
          {
81
            content: true,
82
            role: "user",
83
            id: "some id",
84
          },
85
          {
86
            role: "system",
87
            content: "This is some content",
88
          },
89
          {
90
            id: "another id",
91
            role: "assistant",
92
            content: "This is some content",
93
          },
94
        ],
95
      });
96
      expect(result).toBe(155);
97
    });
98

99
    it("should return for invalid text type", () => {
100
      const result = tokenCount({
101
        model: generateModel("gpt-4", "openai"),
102
        text: 1234,
103
      });
104
      expect(result).toBe(2);
105
    });
106

107
    it("should return correct token count for empty string", () => {
108
      const result = tokenCount({
109
        model: generateModel("gpt-4", "openai"),
110
        text: "",
111
      });
112
      expect(result).toBe(0);
113
    });
114

115
    it("should return correct token count for very long string", () => {
116
      const longString = "A".repeat(10000);
117
      const result = tokenCount({
118
        model: generateModel("gpt-4", "openai"),
119
        text: longString,
120
      });
121
      expect(result).toBeDefined();
122
      expect(result).toBe(1250);
123
    });
124

125
    it("should return undefined for null text input", () => {
126
      const result = tokenCount({
127
        model: generateModel("gpt-4", "openai"),
128
        text: null,
129
      });
130
      expect(result).toBeUndefined();
131
    });
132
    it("should return undefined for undefined text input", () => {
133
      const result = tokenCount({
134
        model: generateModel("gpt-4", "openai"),
135
        text: undefined,
136
      });
137
      expect(result).toBeUndefined();
138
    });
139
  });
140

141
  describe("token count for chat messages", () => {
142
    [
143
      { model: "gpt-4", tokenizer: "openai", tokens: 44 },
144
      { model: "gpt-3.5-turbo-16k-0613", tokenizer: "openai", tokens: 44 },
145
      { model: "gpt-3.5-turbo-16k-0613", tokenizer: "openai", tokens: 44 },
146
      { model: "claude-instant-1.2", tokenizer: "claude", tokens: 48 },
147
    ].forEach(({ model, tokens, tokenizer }) => {
148
      it(`should return token count ${tokens} for ${model}`, () => {
149
        const result = tokenCount({
150
          model: generateModel(model, tokenizer),
151
          text: [
152
            { role: "system", content: "You are a helpful assistant." },
153
            { role: "user", content: "Who won the world series in 2020?" },
154
            {
155
              role: "assistant",
156
              content: "The Los Angeles Dodgers won the World Series in 2020.",
157
            },
158
          ],
159
        });
160
        expect(result).toBeDefined();
161
        expect(result).toBe(tokens);
162
      });
163
    });
164

165
    it("should return for non array", () => {
166
      const result = tokenCount({
167
        model: generateModel("gpt-4", "openai"),
168
        text: { role: "Helo world" },
169
      });
170
      expect(result).toBe(7);
171
    });
172

173
    it("should return for empty array", () => {
174
      const result = tokenCount({
175
        model: generateModel("gpt-4", "openai"),
176
        text: [],
177
      });
178
      expect(result).toBeUndefined();
179
    });
180

181
    it("should return for array of invalid object", () => {
182
      const result = tokenCount({
183
        model: generateModel("gpt-4", "openai"),
184
        text: [{ role: "Helo world" }],
185
      });
186
      expect(result).toBe(9);
187
    });
188
  });
189
});
190

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.