llama-index

token_counting.py
82 строки · 2.6 Кб
Перенос по словам
1
# Modified from:
2
# https://github.com/nyno-ai/openai-token-counter
3

4
from typing import Any, Callable, Dict, List, Optional
5

6
from llama_index.legacy.llms import ChatMessage, MessageRole
7
from llama_index.legacy.utils import get_tokenizer
8

9

10
class TokenCounter:
11
    """Token counter class.
12

13
    Attributes:
14
        model (Optional[str]): The model to use for token counting.
15
    """
16

17
    def __init__(self, tokenizer: Optional[Callable[[str], list]] = None) -> None:
18
        self.tokenizer = tokenizer or get_tokenizer()
19

20
    def get_string_tokens(self, string: str) -> int:
21
        """Get the token count for a string.
22

23
        Args:
24
            string (str): The string to count.
25

26
        Returns:
27
            int: The token count.
28
        """
29
        return len(self.tokenizer(string))
30

31
    def estimate_tokens_in_messages(self, messages: List[ChatMessage]) -> int:
32
        """Estimate token count for a single message.
33

34
        Args:
35
            message (OpenAIMessage): The message to estimate the token count for.
36

37
        Returns:
38
            int: The estimated token count.
39
        """
40
        tokens = 0
41

42
        for message in messages:
43
            if message.role:
44
                tokens += self.get_string_tokens(message.role)
45

46
            if message.content:
47
                tokens += self.get_string_tokens(message.content)
48

49
            additional_kwargs = {**message.additional_kwargs}
50

51
            if "function_call" in additional_kwargs:
52
                function_call = additional_kwargs.pop("function_call")
53
                if function_call.get("name", None) is not None:
54
                    tokens += self.get_string_tokens(function_call["name"])
55

56
                if function_call.get("arguments", None) is not None:
57
                    tokens += self.get_string_tokens(function_call["arguments"])
58

59
                tokens += 3  # Additional tokens for function call
60

61
            tokens += 3  # Add three per message
62

63
            if message.role == MessageRole.FUNCTION:
64
                tokens -= 2  # Subtract 2 if role is "function"
65

66
        return tokens
67

68
    def estimate_tokens_in_functions(self, functions: List[Dict[str, Any]]) -> int:
69
        """Estimate token count for the functions.
70

71
        We take here a list of functions created using the `to_openai_spec` function (or similar).
72

73
        Args:
74
            function (list[Dict[str, Any]]): The functions to estimate the token count for.
75

76
        Returns:
77
            int: The estimated token count.
78
        """
79
        prompt_definition = str(functions)
80
        tokens = self.get_string_tokens(prompt_definition)
81
        tokens += 9  # Additional tokens for function definition
82
        return tokens
83
llama-index

Использование cookies