1
from rayllm.backend.observability.metrics import NonExceptionThrowingCounter as Counter
2
from rayllm.backend.server.models import AviaryModelResponse
7
self.requests_started = Counter(
8
"aviary_requests_started",
9
description="Number of requests started.",
10
tag_keys=("model_id",),
12
self.requests_finished = Counter(
13
"aviary_requests_finished",
14
description="Number of requests finished",
15
tag_keys=("model_id",),
17
self.requests_errored = Counter(
18
"aviary_requests_errored",
19
description="Number of requests errored",
20
tag_keys=("model_id",),
23
self.tokens_generated = Counter(
24
"aviary_tokens_generated",
25
description="Number of tokens generated by Aviary",
26
tag_keys=("model_id",),
28
self.tokens_input = Counter(
29
"aviary_tokens_input",
30
description="Number of tokens input by the user",
31
tag_keys=("model_id",),
34
def track(self, res: AviaryModelResponse, is_first_token: bool, model: str):
35
model_tags = {"model_id": model}
37
if res.num_generated_tokens:
38
self.tokens_generated.inc(res.num_generated_tokens, tags=model_tags)
39
if is_first_token and res.num_input_tokens:
40
self.tokens_input.inc(res.num_input_tokens, tags=model_tags)
43
self.requests_errored.inc(tags=model_tags)
45
if res.finish_reason is not None:
46
self.requests_finished.inc(tags=model_tags)