llama-index

augmentation_accuracy.py
68 строк · 2.2 Кб
Перенос по словам
1
from typing import Any, Optional, Sequence
2

3
from llama_index.legacy.evaluation.base import BaseEvaluator, EvaluationResult
4
from llama_index.legacy.prompts.mixin import PromptDictType, PromptMixinType
5

6

7
class AugmentationAccuracyEvaluator(BaseEvaluator):
8
    """Tonic Validate's augmentation accuracy metric.
9

10
    The output score is a float between 0.0 and 1.0.
11

12
    See https://docs.tonic.ai/validate/ for more details.
13

14
    Args:
15
        openai_service(OpenAIService): The OpenAI service to use. Specifies the chat
16
            completion model to use as the LLM evaluator. Defaults to "gpt-4".
17
    """
18

19
    def __init__(self, openai_service: Optional[Any] = None):
20
        import_err_msg = (
21
            "`tonic-validate` package not found, please run `pip install "
22
            "tonic-validate`"
23
        )
24
        try:
25
            from tonic_validate.metrics.augmentation_accuracy_metric import (
26
                AugmentationAccuracyMetric,
27
            )
28
            from tonic_validate.services.openai_service import OpenAIService
29
        except ImportError:
30
            raise ImportError(import_err_msg)
31

32
        if openai_service is None:
33
            openai_service = OpenAIService("gpt-4")
34
        self.openai_service = openai_service
35
        self.metric = AugmentationAccuracyMetric()
36

37
    async def aevaluate(
38
        self,
39
        query: Optional[str] = None,
40
        response: Optional[str] = None,
41
        contexts: Optional[Sequence[str]] = None,
42
        **kwargs: Any
43
    ) -> EvaluationResult:
44
        from tonic_validate.classes.benchmark import BenchmarkItem
45
        from tonic_validate.classes.llm_response import LLMResponse
46

47
        benchmark_item = BenchmarkItem(question=query)
48

49
        llm_response = LLMResponse(
50
            llm_answer=response,
51
            llm_context_list=contexts,
52
            benchmark_item=benchmark_item,
53
        )
54

55
        score = self.metric.score(llm_response, self.openai_service)
56

57
        return EvaluationResult(
58
            query=query, contexts=contexts, response=response, score=score
59
        )
60

61
    def _get_prompts(self) -> PromptDictType:
62
        return {}
63

64
    def _get_prompt_modules(self) -> PromptMixinType:
65
        return {}
66

67
    def _update_prompts(self, prompts_dict: PromptDictType) -> None:
68
        return
69
llama-index

Использование cookies