GenerativeAIExamples
233 строки · 10.2 Кб
1# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: Apache-2.0
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import json17import logging18import os19import statistics20
21from datasets import Dataset22from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings23from ragas import evaluate24from ragas.llms import LangchainLLMWrapper25from ragas.embeddings import LangchainEmbeddingsWrapper26from ragas.metrics import (27answer_relevancy,28context_precision,29context_recall,30context_relevancy,31faithfulness,32answer_similarity
33)
34
35LLAMA_PROMPT_TEMPLATE = (36"<s>[INST] <<SYS>>"37"{system_prompt}"38"<</SYS>>"39""40"Example 1:"41"[Question]"42"When did Queen Elizabeth II die?"43"[The Start of the Reference Context]"44"""On 8 September 2022, Buckingham Palace released a statement which read: "Following further evaluation this morning, the Queen's doctors are concerned for Her Majesty's health and have recommended she remain under medical supervision. The Queen remains comfortable and at Balmoral."[257][258] Her immediate family rushed to Balmoral to be by her side.[259][260] She died peacefully at 15:10 BST at the age of 96, with two of her children, Charles and Anne, by her side;[261][262] Charles immediately succeeded as monarch. Her death was announced to the public at 18:30,[263][264] setting in motion Operation London Bridge and, because she died in Scotland, Operation Unicorn.[265][266] Elizabeth was the first monarch to die in Scotland since James V in 1542.[267] Her death certificate recorded her cause of death as old age"""45"[The End of Reference Context]"46"[The Start of the Reference Answer]"47"Queen Elizabeth II died on September 8, 2022."48"[The End of Reference Answer]"49"[The Start of the Assistant's Answer]"50"She died on September 8, 2022"51"[The End of Assistant's Answer]"52'"Rating": 5, "Explanation": "The answer is helpful, relevant, accurate, and concise. It matches the information provided in the reference context and answer."'53""54"Example 2:"55"[Question]"56"When did Queen Elizabeth II die?"57"[The Start of the Reference Context]"58"""On 8 September 2022, Buckingham Palace released a statement which read: "Following further evaluation this morning, the Queen's doctors are concerned for Her Majesty's health and have recommended she remain under medical supervision. The Queen remains comfortable and at Balmoral."[257][258] Her immediate family rushed to Balmoral to be by her side.[259][260] She died peacefully at 15:10 BST at the age of 96, with two of her children, Charles and Anne, by her side;[261][262] Charles immediately succeeded as monarch. Her death was announced to the public at 18:30,[263][264] setting in motion Operation London Bridge and, because she died in Scotland, Operation Unicorn.[265][266] Elizabeth was the first monarch to die in Scotland since James V in 1542.[267] Her death certificate recorded her cause of death as old age"""59"[The End of Reference Context]"60"[The Start of the Reference Answer]"61"Queen Elizabeth II died on September 8, 2022."62"[The End of Reference Answer]"63"[The Start of the Assistant's Answer]"64"Queen Elizabeth II was the longest reigning monarch of the United Kingdom and the Commonwealth."65"[The End of Assistant's Answer]"66'"Rating": 1, "Explanation": "The answer is not helpful or relevant. It does not answer the question and instead goes off topic."'67""68"Follow the exact same format as above. Put Rating first and Explanation second. Rating must be between 1 and 5. What is the rating and explanation for the following assistant's answer"69"Rating and Explanation should be in JSON format"70"[Question]"71"{question}"72"[The Start of the Reference Context]"73"{ctx_ref}"74"[The End of Reference Context]"75"[The Start of the Reference Answer]"76"{answer_ref}"77"[The End of Reference Answer]"78"[The Start of the Assistant's Answer]"79"{answer}"80"[The End of Assistant's Answer][/INST]"81)
82SYS_PROMPT = """83You are an impartial judge that evaluates the quality of an assistant's answer to the question provided.
84You evaluation takes into account helpfullness, relevancy, accuracy, and level of detail of the answer.
85You must use both the reference context and reference answer to guide your evaluation.
86"""
87
88logging.basicConfig(level=logging.INFO)89logger = logging.getLogger(__name__)90
91def calculate_ragas_score(row):92values = row[['faithfulness', 'context_relevancy', 'answer_relevancy','context_recall']].values93return statistics.harmonic_mean(values)94
95def eval_ragas(ev_file_path, ev_result_path,llm_model='ai-mixtral-8x7b-instruct'):96"""97This function evaluates a language model's performance using a dataset and metrics.
98It sets the NVAPI_KEY, initializes a ChatNVIDIA model and LangchainLLM object, loads the
99evaluation dataset, prepares data samples, creates a Dataset object, sets the language model
100for each metric, and evaluates the model with the specified metrics, printing the results.
101"""
102llm_params={103"temperature": 0.1,104"max_tokens": 200,105"top_p": 1.0,106"stream": False,}107nvidia_api_key = os.environ["NVIDIA_API_KEY"]108llm_params["nvidia_api_key"]=nvidia_api_key109llm_params["model"]=llm_model110llm = ChatNVIDIA(**llm_params)111nvpl_llm = LangchainLLMWrapper(langchain_llm=llm)112embeddings = NVIDIAEmbeddings(model="ai-embed-qa-4", model_type="passage")113nvpl_embeddings = LangchainEmbeddingsWrapper(embeddings)114try:115with open(ev_file_path, "r", encoding="utf-8") as file:116json_data = json.load(file)117except Exception as e:118logger.info(f"Error Occured while loading file : {e}")119eval_questions = []120eval_answers = []121ground_truth = []122vdb_contexts = []123for entry in json_data:124eval_questions.append(entry["question"])125eval_answers.append(entry["generated_answer"])126vdb_contexts.append(entry["retrieved_context"])127ground_truth.append(entry["ground_truth_answer"])128
129data_samples = {130'question': eval_questions,131'answer': eval_answers,132'contexts': vdb_contexts,133'ground_truth': ground_truth,134}135dataset = Dataset.from_dict(data_samples)136
137result = evaluate(138dataset,139llm=llm,140embeddings=nvpl_embeddings,141metrics=[142answer_similarity,143faithfulness,144context_precision,145context_relevancy,146answer_relevancy,147context_recall
148],149)150df = result.to_pandas()151df['ragas_score']=df.apply(calculate_ragas_score,axis=1)152df.to_parquet(ev_result_path+'.parquet')153result['ragas_score']= statistics.harmonic_mean([result['faithfulness'], result['context_relevancy'], result['answer_relevancy'], result['context_recall']])154with open(ev_result_path+'.json', "w", encoding="utf-8") as json_file:155json.dump(result, json_file, indent=2)156
157logger.info(f"Results written to {ev_result_path}.json and {ev_result_path}.parquet")158
159
160def eval_llm_judge(161ev_file_path,162ev_result_path,163llm_model='ai-mixtral-8x7b-instruct'164):165"""166The function utilizes pre-trained Judge LLM to assess the coherence and relevance of a generated answer
167for a given question and context. It returns a Likert rating between 1 and 5, indicating the quality of
168the answer and an explanation supporting the same, returns the mean of likert rating, dumping the same in JSON format.
169"""
170llm_params={171"temperature": 0.1,172"max_tokens": 200,173"top_p": 1.0,174"stream": False,}175nvidia_api_key = os.environ["NVIDIA_API_KEY"]176llm_params["nvidia_api_key"]=nvidia_api_key177llm_params["model"]=llm_model178
179llm = ChatNVIDIA()180# Read the JSON file181try:182with open(ev_file_path, "r", encoding="utf-8") as file:183data = json.load(file)184except Exception as e:185logger.info(f"Error Occured while loading file : {e}")186
187llama_ratings = []188llama_explanations = []189for d in data:190try:191context = LLAMA_PROMPT_TEMPLATE.format(192system_prompt=SYS_PROMPT,193question=d["question"],194ctx_ref=d["ground_truth_context"],195answer_ref=d["ground_truth_answer"],196answer=d["answer"],197)198
199response = llm.invoke(context)200response_body = json.loads(response.content)201rating = response_body["Rating"]202explanantion = response_body["Explanantion"]203llama_ratings.append(rating)204llama_explanations.append(explanantion)205logger.info(f"progress: {len(llama_explanations)}/{len(data)}")206except Exception as e:207logger.info(f"Exception Occured: {e}")208llama_ratings.append(None)209
210logger.info(f"Number of judgements: {len(llama_ratings)}")211
212llama_ratings = [1 if r == 0 else r for r in llama_ratings] # Change 0 ratings to 1213llama_ratings_filtered = [r for r in llama_ratings if r] # Remove empty ratings214
215mean = round(statistics.mean(llama_ratings_filtered), 1)216logger.info(f"Number of ratings: {len(llama_ratings_filtered)}")217logger.info(f"Mean rating: {mean}")218
219results = list(220zip(221llama_ratings,222llama_explanations,223[d["question"] for d in data],224[d["answer"] for d in data],225[d["ground_truth_answer"] for d in data],226[d["ground_truth_context"] for d in data],227)228)229
230with open(ev_result_path, "w", encoding="utf-8") as json_file:231json.dump(results, json_file, indent=2)232
233logger.info(f"Results written to {ev_result_path}")