1
import { ModelUsageUnit } from "@/src/constants";
2
import { tokenCount } from "@/src/features/ingest/lib/usage";
4
describe("Token Count Functions", () => {
5
const generateModel = (model: string, tokenizer: string) => {
9
tokenizerId: tokenizer,
13
tokenizerModel: model,
15
createdAt: new Date(),
16
updatedAt: new Date(),
23
unit: ModelUsageUnit.Tokens,
27
describe("token count for strings", () => {
29
{ model: "gpt-3.5-turbo", tokenizer: "openai", tokens: 114 },
30
{ model: "text-embedding-ada-002", tokenizer: "openai", tokens: 114 },
31
{ model: "gpt-4-1106-preview", tokenizer: "openai", tokens: 114 },
32
{ model: "gpt-4-vision-preview", tokenizer: "openai", tokens: 114 },
33
{ model: "claude", tokenizer: "claude", tokens: 118 },
34
{ model: "claude-instant-1.2", tokenizer: "claude", tokens: 118 },
35
{ model: "gpt-3.5-turbo-1106", tokenizer: "openai", tokens: 114 },
36
].forEach(({ model, tokens, tokenizer }) => {
37
it(`should return token count ${tokens} for ${model}`, () => {
38
const result = tokenCount({
39
model: generateModel(model, tokenizer),
40
text: "Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.",
42
expect(result).toBeDefined();
43
expect(result).toBe(tokens);
47
it("should return undefined for unknown model", () => {
48
const result = tokenCount({
49
model: generateModel("unknown-model", "unknown-tokenizer"),
50
text: "Hello, World!",
52
expect(result).toBeUndefined();
55
it("check extensive openai chat message", () => {
56
const result = tokenCount({
57
model: generateModel("gpt-3.5-turbo", "openai"),
69
timestamp: "2024-01-00:00:00.488Z",
75
"Hey Simon! 😊 How's your day going? Have you been up to anything interesting lately?",
77
timestamp: "2024-01-24T10:00:00.929Z",
87
content: "This is some content",
92
content: "This is some content",
96
expect(result).toBe(155);
99
it("should return for invalid text type", () => {
100
const result = tokenCount({
101
model: generateModel("gpt-4", "openai"),
104
expect(result).toBe(2);
107
it("should return correct token count for empty string", () => {
108
const result = tokenCount({
109
model: generateModel("gpt-4", "openai"),
112
expect(result).toBe(0);
115
it("should return correct token count for very long string", () => {
116
const longString = "A".repeat(10000);
117
const result = tokenCount({
118
model: generateModel("gpt-4", "openai"),
121
expect(result).toBeDefined();
122
expect(result).toBe(1250);
125
it("should return undefined for null text input", () => {
126
const result = tokenCount({
127
model: generateModel("gpt-4", "openai"),
130
expect(result).toBeUndefined();
132
it("should return undefined for undefined text input", () => {
133
const result = tokenCount({
134
model: generateModel("gpt-4", "openai"),
137
expect(result).toBeUndefined();
141
describe("token count for chat messages", () => {
143
{ model: "gpt-4", tokenizer: "openai", tokens: 44 },
144
{ model: "gpt-3.5-turbo-16k-0613", tokenizer: "openai", tokens: 44 },
145
{ model: "gpt-3.5-turbo-16k-0613", tokenizer: "openai", tokens: 44 },
146
{ model: "claude-instant-1.2", tokenizer: "claude", tokens: 48 },
147
].forEach(({ model, tokens, tokenizer }) => {
148
it(`should return token count ${tokens} for ${model}`, () => {
149
const result = tokenCount({
150
model: generateModel(model, tokenizer),
152
{ role: "system", content: "You are a helpful assistant." },
153
{ role: "user", content: "Who won the world series in 2020?" },
156
content: "The Los Angeles Dodgers won the World Series in 2020.",
160
expect(result).toBeDefined();
161
expect(result).toBe(tokens);
165
it("should return for non array", () => {
166
const result = tokenCount({
167
model: generateModel("gpt-4", "openai"),
168
text: { role: "Helo world" },
170
expect(result).toBe(7);
173
it("should return for empty array", () => {
174
const result = tokenCount({
175
model: generateModel("gpt-4", "openai"),
178
expect(result).toBeUndefined();
181
it("should return for array of invalid object", () => {
182
const result = tokenCount({
183
model: generateModel("gpt-4", "openai"),
184
text: [{ role: "Helo world" }],
186
expect(result).toBe(9);