llama-index

eval_utils.py
78 строк · 2.2 Кб
Перенос по словам
1
"""Get evaluation utils.
2

3
NOTE: These are beta functions, might change.
4

5
"""
6

7
import asyncio
8
from collections import defaultdict
9
from typing import Any, List, Optional, Tuple
10

11
import numpy as np
12
import pandas as pd
13

14
from llama_index.legacy.async_utils import asyncio_module
15
from llama_index.legacy.core.base_query_engine import BaseQueryEngine
16
from llama_index.legacy.evaluation.base import EvaluationResult
17

18

19
async def aget_responses(
20
    questions: List[str], query_engine: BaseQueryEngine, show_progress: bool = False
21
) -> List[str]:
22
    """Get responses."""
23
    tasks = []
24
    for question in questions:
25
        tasks.append(query_engine.aquery(question))
26
    asyncio_mod = asyncio_module(show_progress=show_progress)
27
    return await asyncio_mod.gather(*tasks)
28

29

30
def get_responses(
31
    *args: Any,
32
    **kwargs: Any,
33
) -> List[str]:
34
    """Get responses.
35

36
    Sync version of aget_responses.
37

38
    """
39
    return asyncio.run(aget_responses(*args, **kwargs))
40

41

42
def get_results_df(
43
    eval_results_list: List[EvaluationResult], names: List[str], metric_keys: List[str]
44
) -> pd.DataFrame:
45
    """Get results df.
46

47
    Args:
48
        eval_results_list (List[EvaluationResult]):
49
            List of evaluation results.
50
        names (List[str]):
51
            Names of the evaluation results.
52
        metric_keys (List[str]):
53
            List of metric keys to get.
54

55
    """
56
    metric_dict = defaultdict(list)
57
    metric_dict["names"] = names
58
    for metric_key in metric_keys:
59
        for eval_results in eval_results_list:
60
            mean_score = np.array([r.score for r in eval_results[metric_key]]).mean()
61
            metric_dict[metric_key].append(mean_score)
62
    return pd.DataFrame(metric_dict)
63

64

65
def default_parser(eval_response: str) -> Tuple[Optional[float], Optional[str]]:
66
    """
67
    Default parser function for evaluation response.
68

69
    Args:
70
        eval_response (str): The response string from the evaluation.
71

72
    Returns:
73
        Tuple[float, str]: A tuple containing the score as a float and the reasoning as a string.
74
    """
75
    score_str, reasoning_str = eval_response.split("\n", 1)
76
    score = float(score_str)
77
    reasoning = reasoning_str.lstrip("\n")
78
    return score, reasoning
79
llama-index

Использование cookies