llama-index
78 строк · 2.2 Кб
1"""Get evaluation utils.
2
3NOTE: These are beta functions, might change.
4
5"""
6
7import asyncio8from collections import defaultdict9from typing import Any, List, Optional, Tuple10
11import numpy as np12import pandas as pd13
14from llama_index.legacy.async_utils import asyncio_module15from llama_index.legacy.core.base_query_engine import BaseQueryEngine16from llama_index.legacy.evaluation.base import EvaluationResult17
18
19async def aget_responses(20questions: List[str], query_engine: BaseQueryEngine, show_progress: bool = False21) -> List[str]:22"""Get responses."""23tasks = []24for question in questions:25tasks.append(query_engine.aquery(question))26asyncio_mod = asyncio_module(show_progress=show_progress)27return await asyncio_mod.gather(*tasks)28
29
30def get_responses(31*args: Any,32**kwargs: Any,33) -> List[str]:34"""Get responses.35
36Sync version of aget_responses.
37
38"""
39return asyncio.run(aget_responses(*args, **kwargs))40
41
42def get_results_df(43eval_results_list: List[EvaluationResult], names: List[str], metric_keys: List[str]44) -> pd.DataFrame:45"""Get results df.46
47Args:
48eval_results_list (List[EvaluationResult]):
49List of evaluation results.
50names (List[str]):
51Names of the evaluation results.
52metric_keys (List[str]):
53List of metric keys to get.
54
55"""
56metric_dict = defaultdict(list)57metric_dict["names"] = names58for metric_key in metric_keys:59for eval_results in eval_results_list:60mean_score = np.array([r.score for r in eval_results[metric_key]]).mean()61metric_dict[metric_key].append(mean_score)62return pd.DataFrame(metric_dict)63
64
65def default_parser(eval_response: str) -> Tuple[Optional[float], Optional[str]]:66"""67Default parser function for evaluation response.
68
69Args:
70eval_response (str): The response string from the evaluation.
71
72Returns:
73Tuple[float, str]: A tuple containing the score as a float and the reasoning as a string.
74"""
75score_str, reasoning_str = eval_response.split("\n", 1)76score = float(score_str)77reasoning = reasoning_str.lstrip("\n")78return score, reasoning79