llama-index
/
augmentation_precision.py
68 строк · 2.2 Кб
1from typing import Any, Optional, Sequence2
3from llama_index.legacy.evaluation.base import BaseEvaluator, EvaluationResult4from llama_index.legacy.prompts.mixin import PromptDictType, PromptMixinType5
6
7class AugmentationPrecisionEvaluator(BaseEvaluator):8"""Tonic Validate's augmentation precision metric.9
10The output score is a float between 0.0 and 1.0.
11
12See https://docs.tonic.ai/validate/ for more details.
13
14Args:
15openai_service(OpenAIService): The OpenAI service to use. Specifies the chat
16completion model to use as the LLM evaluator. Defaults to "gpt-4".
17"""
18
19def __init__(self, openai_service: Optional[Any] = None):20import_err_msg = (21"`tonic-validate` package not found, please run `pip install "22"tonic-validate`"23)24try:25from tonic_validate.metrics.augmentation_precision_metric import (26AugmentationPrecisionMetric,27)28from tonic_validate.services.openai_service import OpenAIService29except ImportError:30raise ImportError(import_err_msg)31
32if openai_service is None:33openai_service = OpenAIService("gpt-4")34self.openai_service = openai_service35self.metric = AugmentationPrecisionMetric()36
37async def aevaluate(38self,39query: Optional[str] = None,40response: Optional[str] = None,41contexts: Optional[Sequence[str]] = None,42**kwargs: Any43) -> EvaluationResult:44from tonic_validate.classes.benchmark import BenchmarkItem45from tonic_validate.classes.llm_response import LLMResponse46
47benchmark_item = BenchmarkItem(question=query)48
49llm_response = LLMResponse(50llm_answer=response,51llm_context_list=contexts,52benchmark_item=benchmark_item,53)54
55score = self.metric.score(llm_response, self.openai_service)56
57return EvaluationResult(58query=query, contexts=contexts, response=response, score=score59)60
61def _get_prompts(self) -> PromptDictType:62return {}63
64def _get_prompt_modules(self) -> PromptMixinType:65return {}66
67def _update_prompts(self, prompts_dict: PromptDictType) -> None:68return69