llama-index

Форк
0
126 строк · 4.0 Кб
1
"""Evaluator."""
2

3
import asyncio
4
from abc import abstractmethod
5
from typing import Any, Optional, Sequence
6

7
from llama_index.legacy.bridge.pydantic import BaseModel, Field
8
from llama_index.legacy.core.response.schema import Response
9
from llama_index.legacy.prompts.mixin import PromptMixin, PromptMixinType
10

11

12
class EvaluationResult(BaseModel):
13
    """Evaluation result.
14

15
    Output of an BaseEvaluator.
16
    """
17

18
    query: Optional[str] = Field(None, description="Query string")
19
    contexts: Optional[Sequence[str]] = Field(None, description="Context strings")
20
    response: Optional[str] = Field(None, description="Response string")
21
    passing: Optional[bool] = Field(
22
        None, description="Binary evaluation result (passing or not)"
23
    )
24
    feedback: Optional[str] = Field(
25
        None, description="Feedback or reasoning for the response"
26
    )
27
    score: Optional[float] = Field(None, description="Score for the response")
28
    pairwise_source: Optional[str] = Field(
29
        None,
30
        description=(
31
            "Used only for pairwise and specifies whether it is from original order of"
32
            " presented answers or flipped order"
33
        ),
34
    )
35
    invalid_result: bool = Field(
36
        default=False, description="Whether the evaluation result is an invalid one."
37
    )
38
    invalid_reason: Optional[str] = Field(
39
        default=None, description="Reason for invalid evaluation."
40
    )
41

42

43
class BaseEvaluator(PromptMixin):
44
    """Base Evaluator class."""
45

46
    def _get_prompt_modules(self) -> PromptMixinType:
47
        """Get prompt modules."""
48
        return {}
49

50
    def evaluate(
51
        self,
52
        query: Optional[str] = None,
53
        response: Optional[str] = None,
54
        contexts: Optional[Sequence[str]] = None,
55
        **kwargs: Any,
56
    ) -> EvaluationResult:
57
        """Run evaluation with query string, retrieved contexts,
58
        and generated response string.
59

60
        Subclasses can override this method to provide custom evaluation logic and
61
        take in additional arguments.
62
        """
63
        return asyncio.run(
64
            self.aevaluate(
65
                query=query,
66
                response=response,
67
                contexts=contexts,
68
                **kwargs,
69
            )
70
        )
71

72
    @abstractmethod
73
    async def aevaluate(
74
        self,
75
        query: Optional[str] = None,
76
        response: Optional[str] = None,
77
        contexts: Optional[Sequence[str]] = None,
78
        **kwargs: Any,
79
    ) -> EvaluationResult:
80
        """Run evaluation with query string, retrieved contexts,
81
        and generated response string.
82

83
        Subclasses can override this method to provide custom evaluation logic and
84
        take in additional arguments.
85
        """
86
        raise NotImplementedError
87

88
    def evaluate_response(
89
        self,
90
        query: Optional[str] = None,
91
        response: Optional[Response] = None,
92
        **kwargs: Any,
93
    ) -> EvaluationResult:
94
        """Run evaluation with query string and generated Response object.
95

96
        Subclasses can override this method to provide custom evaluation logic and
97
        take in additional arguments.
98
        """
99
        return asyncio.run(
100
            self.aevaluate_response(query=query, response=response, **kwargs)
101
        )
102

103
    async def aevaluate_response(
104
        self,
105
        query: Optional[str] = None,
106
        response: Optional[Response] = None,
107
        **kwargs: Any,
108
    ) -> EvaluationResult:
109
        """Run evaluation with query string and generated Response object.
110

111
        Subclasses can override this method to provide custom evaluation logic and
112
        take in additional arguments.
113
        """
114
        response_str: Optional[str] = None
115
        contexts: Optional[Sequence[str]] = None
116
        if response is not None:
117
            response_str = response.response
118
            contexts = [node.get_content() for node in response.source_nodes]
119

120
        return await self.aevaluate(
121
            query=query, response=response_str, contexts=contexts, **kwargs
122
        )
123

124

125
# legacy: backward compatibility
126
Evaluation = EvaluationResult
127

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.