llama-index

answer_similarity.py
69 строк · 2.3 Кб
Перенос по словам
1
from typing import Any, Optional, Sequence
2

3
from llama_index.legacy.evaluation.base import BaseEvaluator, EvaluationResult
4
from llama_index.legacy.prompts.mixin import PromptDictType, PromptMixinType
5

6

7
class AnswerSimilarityEvaluator(BaseEvaluator):
8
    """Tonic Validate's answer similarity metric.
9

10
    The output score is a float between 0.0 and 5.0.
11

12
    See https://docs.tonic.ai/validate/ for more details.
13

14
    Args:
15
        openai_service(OpenAIService): The OpenAI service to use. Specifies the chat
16
            completion model to use as the LLM evaluator. Defaults to "gpt-4".
17
    """
18

19
    def __init__(self, openai_service: Optional[Any] = None):
20
        import_err_msg = (
21
            "`tonic-validate` package not found, please run `pip install "
22
            "tonic-validate`"
23
        )
24
        try:
25
            from tonic_validate.metrics.answer_similarity_metric import (
26
                AnswerSimilarityMetric,
27
            )
28
            from tonic_validate.services.openai_service import OpenAIService
29
        except ImportError:
30
            raise ImportError(import_err_msg)
31

32
        if openai_service is None:
33
            openai_service = OpenAIService("gpt-4")
34
        self.openai_service = openai_service
35
        self.metric = AnswerSimilarityMetric()
36

37
    async def aevaluate(
38
        self,
39
        query: Optional[str] = None,
40
        response: Optional[str] = None,
41
        contexts: Optional[Sequence[str]] = None,
42
        reference_response: Optional[str] = None,
43
        **kwargs: Any
44
    ) -> EvaluationResult:
45
        from tonic_validate.classes.benchmark import BenchmarkItem
46
        from tonic_validate.classes.llm_response import LLMResponse
47

48
        benchmark_item = BenchmarkItem(question=query, answer=reference_response)
49

50
        llm_response = LLMResponse(
51
            llm_answer=response,
52
            llm_context_list=contexts,
53
            benchmark_item=benchmark_item,
54
        )
55

56
        score = self.metric.score(llm_response, self.openai_service)
57

58
        return EvaluationResult(
59
            query=query, contexts=contexts, response=response, score=score
60
        )
61

62
    def _get_prompts(self) -> PromptDictType:
63
        return {}
64

65
    def _get_prompt_modules(self) -> PromptMixinType:
66
        return {}
67

68
    def _update_prompts(self, prompts_dict: PromptDictType) -> None:
69
        return
70
llama-index

Использование cookies