milvus-io_bootcamp

evaluate_fiqa_openai.ipynb
447 строк · 12.9 Кб
Перенос по словам
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "source": [
6
    "# Use Ragas to evaluate the OpenAI Assistant\n",
7
    "\n",
8
    "**Please note that this test requires a large amount of OpenAI api token consumption. Please read it carefully and Pay attention to the number of times you request access.**"
9
   ],
10
   "metadata": {
11
    "collapsed": false,
12
    "pycharm": {
13
     "name": "#%% md\n"
14
    }
15
   }
16
  },
17
  {
18
   "cell_type": "markdown",
19
   "source": [
20
    "## 1. Prepare environment and data\n",
21
    "\n",
22
    "Before starting, you must set OPENAI_API_KEY in your environment variables."
23
   ],
24
   "metadata": {
25
    "collapsed": false,
26
    "pycharm": {
27
     "name": "#%% md\n"
28
    }
29
   }
30
  },
31
  {
32
   "cell_type": "markdown",
33
   "source": [
34
    "Install pip dependencies"
35
   ],
36
   "metadata": {
37
    "collapsed": false,
38
    "pycharm": {
39
     "name": "#%% md\n"
40
    }
41
   }
42
  },
43
  {
44
   "cell_type": "code",
45
   "execution_count": null,
46
   "outputs": [],
47
   "source": [
48
    "# ! python -m pip install openai beir pandas ragas==0.0.17"
49
   ],
50
   "metadata": {
51
    "collapsed": false,
52
    "pycharm": {
53
     "name": "#%%\n"
54
    }
55
   }
56
  },
57
  {
58
   "cell_type": "markdown",
59
   "source": [
60
    "Download [Financial Opinion Mining and Question Answering (fiqa) Dataset](https://sites.google.com/view/fiqa/) data if it not exists in your local space. We convert it into a ragas form that is easier to process, referring from this [script](https://github.com/explodinggradients/ragas/blob/main/experiments/baselines/fiqa/dataset-exploration-and-baseline.ipynb)."
61
   ],
62
   "metadata": {
63
    "collapsed": false,
64
    "pycharm": {
65
     "name": "#%% md\n"
66
    }
67
   }
68
  },
69
  {
70
   "cell_type": "code",
71
   "execution_count": 1,
72
   "outputs": [
73
    {
74
     "name": "stdout",
75
     "output_type": "stream",
76
     "text": [
77
      "1706\n"
78
     ]
79
    }
80
   ],
81
   "source": [
82
    "import json\n",
83
    "import pandas as pd\n",
84
    "import os\n",
85
    "from tqdm import tqdm\n",
86
    "from datasets import Dataset\n",
87
    "from beir import util\n",
88
    "\n",
89
    "\n",
90
    "def prepare_fiqa_without_answer(knowledge_path):\n",
91
    "    dataset_name = \"fiqa\"\n",
92
    "\n",
93
    "    if not os.path.exists(os.path.join(knowledge_path, f'{dataset_name}.zip')):\n",
94
    "        url = (\n",
95
    "            \"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip\".format(\n",
96
    "                dataset_name\n",
97
    "            )\n",
98
    "        )\n",
99
    "        util.download_and_unzip(url, knowledge_path)\n",
100
    "\n",
101
    "    data_path = os.path.join(knowledge_path, 'fiqa')\n",
102
    "    with open(os.path.join(data_path, \"corpus.jsonl\")) as f:\n",
103
    "        cs = [pd.Series(json.loads(l)) for l in f.readlines()]\n",
104
    "\n",
105
    "    corpus_df = pd.DataFrame(cs)\n",
106
    "\n",
107
    "    corpus_df = corpus_df.rename(columns={\"_id\": \"corpus-id\", \"text\": \"ground_truth\"})\n",
108
    "    corpus_df = corpus_df.drop(columns=[\"title\", \"metadata\"])\n",
109
    "    corpus_df[\"corpus-id\"] = corpus_df[\"corpus-id\"].astype(int)\n",
110
    "    corpus_df.head()\n",
111
    "\n",
112
    "    with open(os.path.join(data_path, \"queries.jsonl\")) as f:\n",
113
    "        qs = [pd.Series(json.loads(l)) for l in f.readlines()]\n",
114
    "\n",
115
    "    queries_df = pd.DataFrame(qs)\n",
116
    "    queries_df = queries_df.rename(columns={\"_id\": \"query-id\", \"text\": \"question\"})\n",
117
    "    queries_df = queries_df.drop(columns=[\"metadata\"])\n",
118
    "    queries_df[\"query-id\"] = queries_df[\"query-id\"].astype(int)\n",
119
    "    queries_df.head()\n",
120
    "\n",
121
    "    splits = [\"dev\", \"test\", \"train\"]\n",
122
    "    split_df = {}\n",
123
    "    for s in splits:\n",
124
    "        split_df[s] = pd.read_csv(os.path.join(data_path, f\"qrels/{s}.tsv\"), sep=\"\\t\").drop(\n",
125
    "            columns=[\"score\"]\n",
126
    "        )\n",
127
    "\n",
128
    "    final_split_df = {}\n",
129
    "    for split in split_df:\n",
130
    "        df = queries_df.merge(split_df[split], on=\"query-id\")\n",
131
    "        df = df.merge(corpus_df, on=\"corpus-id\")\n",
132
    "        df = df.drop(columns=[\"corpus-id\"])\n",
133
    "        grouped = df.groupby(\"query-id\").apply(\n",
134
    "            lambda x: pd.Series(\n",
135
    "                {\n",
136
    "                    \"question\": x[\"question\"].sample().values[0],\n",
137
    "                    \"ground_truths\": x[\"ground_truth\"].tolist(),\n",
138
    "                }\n",
139
    "            )\n",
140
    "        )\n",
141
    "\n",
142
    "        grouped = grouped.reset_index()\n",
143
    "        grouped = grouped.drop(columns=\"query-id\")\n",
144
    "        final_split_df[split] = grouped\n",
145
    "\n",
146
    "    return final_split_df\n",
147
    "\n",
148
    "\n",
149
    "knowledge_datas_path = './knowledge_datas'\n",
150
    "fiqa_path = os.path.join(knowledge_datas_path, 'fiqa_doc.txt')\n",
151
    "\n",
152
    "if not os.path.exists(knowledge_datas_path):\n",
153
    "    os.mkdir(knowledge_datas_path)\n",
154
    "contexts_list = []\n",
155
    "answer_list = []\n",
156
    "\n",
157
    "final_split_df = prepare_fiqa_without_answer(knowledge_datas_path)\n",
158
    "\n",
159
    "docs = []\n",
160
    "\n",
161
    "split = 'test'\n",
162
    "for ds in final_split_df[split][\"ground_truths\"]:\n",
163
    "    docs.extend([d for d in ds])\n",
164
    "print(len(docs))\n",
165
    "\n",
166
    "docs_str = '\\n'.join(docs)\n",
167
    "with open(fiqa_path, 'w') as f:\n",
168
    "    f.write(docs_str)\n",
169
    "\n",
170
    "split = 'test'\n",
171
    "question_list = final_split_df[split][\"question\"].to_list()\n",
172
    "ground_truth_list = final_split_df[split][\"ground_truths\"].to_list()"
173
   ],
174
   "metadata": {
175
    "collapsed": false,
176
    "pycharm": {
177
     "name": "#%%\n"
178
    }
179
   }
180
  },
181
  {
182
   "cell_type": "markdown",
183
   "source": [
184
    "Now we have the question list and the ground truth list. And the knowledge documents are prepared in `fiqa_path`.\n"
185
   ],
186
   "metadata": {
187
    "collapsed": false,
188
    "pycharm": {
189
     "name": "#%% md\n"
190
    }
191
   }
192
  },
193
  {
194
   "cell_type": "markdown",
195
   "source": [
196
    "## 2. Building RAG using OpenAI assistant\n",
197
    "\n",
198
    "To get the context content from the annotations returned by Open AI."
199
   ],
200
   "metadata": {
201
    "collapsed": false,
202
    "pycharm": {
203
     "name": "#%% md\n"
204
    }
205
   }
206
  },
207
  {
208
   "cell_type": "code",
209
   "execution_count": null,
210
   "outputs": [],
211
   "source": [
212
    "import time\n",
213
    "from openai import OpenAI\n",
214
    "\n",
215
    "client = OpenAI()\n",
216
    "\n",
217
    "# Set OPENAI_API_KEY in your environment value\n",
218
    "client.api_key = os.getenv('OPENAI_API_KEY')\n",
219
    "\n",
220
    "\n",
221
    "class OpenAITimeoutException(Exception):\n",
222
    "    pass\n",
223
    "\n",
224
    "\n",
225
    "def get_content_from_retrieved_message(message):\n",
226
    "    # Extract the message content\n",
227
    "    message_content = message.content[0].text\n",
228
    "    annotations = message_content.annotations\n",
229
    "    contexts = []\n",
230
    "    for annotation in annotations:\n",
231
    "        message_content.value = message_content.value.replace(annotation.text, f'')\n",
232
    "        if (file_citation := getattr(annotation, 'file_citation', None)):\n",
233
    "            contexts.append(file_citation.quote)\n",
234
    "    if len(contexts) == 0:\n",
235
    "        contexts = ['empty context.']\n",
236
    "    return message_content.value, contexts\n",
237
    "\n",
238
    "\n",
239
    "def try_get_answer_contexts(assistant_id, question, timeout_seconds=120):\n",
240
    "    thread = client.beta.threads.create(\n",
241
    "        messages=[\n",
242
    "            {\n",
243
    "                \"role\": \"user\",\n",
244
    "                \"content\": question,\n",
245
    "            }\n",
246
    "        ]\n",
247
    "    )\n",
248
    "    thread_id = thread.id\n",
249
    "    run = client.beta.threads.runs.create(\n",
250
    "        thread_id=thread_id,\n",
251
    "        assistant_id=assistant_id,\n",
252
    "    )\n",
253
    "    start_time = time.time()\n",
254
    "    while True:\n",
255
    "        elapsed_time = time.time() - start_time\n",
256
    "        if elapsed_time > timeout_seconds:\n",
257
    "            raise Exception(\"OpenAI retrieving answer Timeout！\")\n",
258
    "\n",
259
    "        run = client.beta.threads.runs.retrieve(\n",
260
    "            thread_id=thread_id,\n",
261
    "            run_id=run.id\n",
262
    "        )\n",
263
    "        if run.status == 'completed':\n",
264
    "            break\n",
265
    "    messages = client.beta.threads.messages.list(\n",
266
    "        thread_id=thread_id\n",
267
    "    )\n",
268
    "    assert len(messages.data) > 1\n",
269
    "    res, contexts = get_content_from_retrieved_message(messages.data[0])\n",
270
    "    response = client.beta.threads.delete(thread_id)\n",
271
    "    assert response.deleted is True\n",
272
    "    return contexts, res\n",
273
    "\n",
274
    "\n",
275
    "def get_answer_contexts_from_assistant(question, assistant_id, timeout_seconds=120, retry_num=6):\n",
276
    "    res = 'failed. please retry.'\n",
277
    "    contexts = ['failed. please retry.']\n",
278
    "    try:\n",
279
    "        for _ in range(retry_num):\n",
280
    "            try:\n",
281
    "                contexts, res = try_get_answer_contexts(assistant_id, question, timeout_seconds)\n",
282
    "                break\n",
283
    "            except OpenAITimeoutException as e:\n",
284
    "                print('OpenAI retrieving answer Timeout, retry...')\n",
285
    "                continue\n",
286
    "    except Exception as e:\n",
287
    "        print(e)\n",
288
    "    return res, contexts"
289
   ],
290
   "metadata": {
291
    "collapsed": false,
292
    "pycharm": {
293
     "name": "#%%\n"
294
    }
295
   }
296
  },
297
  {
298
   "cell_type": "markdown",
299
   "source": [
300
    "Build assistant and upload knowledge files."
301
   ],
302
   "metadata": {
303
    "collapsed": false,
304
    "pycharm": {
305
     "name": "#%% md\n"
306
    }
307
   }
308
  },
309
  {
310
   "cell_type": "code",
311
   "execution_count": 3,
312
   "outputs": [
313
    {
314
     "name": "stderr",
315
     "output_type": "stream",
316
     "text": [
317
      "  0%|          | 0/648 [03:45<?, ?it/s]\n",
318
      "\n",
319
      "KeyboardInterrupt\n",
320
      "\n"
321
     ]
322
    }
323
   ],
324
   "source": [
325
    "file = client.files.create(\n",
326
    "    file=open(fiqa_path, \"rb\"),\n",
327
    "    purpose='assistants'\n",
328
    ")\n",
329
    "\n",
330
    "# Add the file to the assistant\n",
331
    "assistant = client.beta.assistants.create(\n",
332
    "    instructions=\"You are a customer support chatbot. You must use your retrieval tool to retrieve relevant knowledge to best respond to customer queries.\",\n",
333
    "    model=\"gpt-4-1106-preview\",\n",
334
    "    tools=[{\"type\": \"retrieval\"}],\n",
335
    "    file_ids=[file.id]\n",
336
    ")"
337
   ],
338
   "metadata": {
339
    "collapsed": false,
340
    "pycharm": {
341
     "name": "#%%\n"
342
    }
343
   }
344
  },
345
  {
346
   "cell_type": "markdown",
347
   "source": [
348
    "## 3. Start Ragas Evaluation\n",
349
    "\n",
350
    "Note that a large amount of OpenAI api token is consumed. Every time you ask a question and every evaluation, you will ask the OpenAI service. Please pay attention to your token consumption. If you only want to run a small number of tests, you can modify the code to reduce the test size."
351
   ],
352
   "metadata": {
353
    "collapsed": false,
354
    "pycharm": {
355
     "name": "#%% md\n"
356
    }
357
   }
358
  },
359
  {
360
   "cell_type": "code",
361
   "execution_count": null,
362
   "outputs": [],
363
   "source": [
364
    "for question in tqdm(question_list):\n",
365
    "    answer, contexts = get_answer_contexts_from_assistant(question, assistant.id)\n",
366
    "    # print(f'answer = {answer}')\n",
367
    "    # print(f'contexts = {contexts}')\n",
368
    "    # print('=' * 80)\n",
369
    "    answer_list.append(answer)\n",
370
    "    contexts_list.append(contexts)"
371
   ],
372
   "metadata": {
373
    "collapsed": false,
374
    "pycharm": {
375
     "name": "#%%\n"
376
    }
377
   }
378
  },
379
  {
380
   "cell_type": "markdown",
381
   "source": [
382
    "You can choose the indicators you care about to test.\n"
383
   ],
384
   "metadata": {
385
    "collapsed": false,
386
    "pycharm": {
387
     "name": "#%% md\n"
388
    }
389
   }
390
  },
391
  {
392
   "cell_type": "code",
393
   "execution_count": null,
394
   "outputs": [],
395
   "source": [
396
    "from ragas import evaluate\n",
397
    "from ragas.metrics import answer_relevancy, faithfulness, context_recall, context_precision, answer_similarity\n",
398
    "\n",
399
    "ds = Dataset.from_dict({\"question\": question_list,\n",
400
    "                        \"contexts\": contexts_list,\n",
401
    "                        \"answer\": answer_list,\n",
402
    "                        \"ground_truths\": ground_truth_list})\n",
403
    "\n",
404
    "result = evaluate(\n",
405
    "    ds,\n",
406
    "    metrics=[\n",
407
    "        context_precision,\n",
408
    "        # context_recall,\n",
409
    "        # faithfulness,\n",
410
    "        # answer_relevancy,\n",
411
    "        # answer_similarity,\n",
412
    "        # answer_correctness,\n",
413
    "    ],\n",
414
    "\n",
415
    ")\n",
416
    "print(result)"
417
   ],
418
   "metadata": {
419
    "collapsed": false,
420
    "pycharm": {
421
     "name": "#%%\n"
422
    }
423
   }
424
  }
425
 ],
426
 "metadata": {
427
  "kernelspec": {
428
   "display_name": "Python 3",
429
   "language": "python",
430
   "name": "python3"
431
  },
432
  "language_info": {
433
   "codemirror_mode": {
434
    "name": "ipython",
435
    "version": 2
436
   },
437
   "file_extension": ".py",
438
   "mimetype": "text/x-python",
439
   "name": "python",
440
   "nbconvert_exporter": "python",
441
   "pygments_lexer": "ipython2",
442
   "version": "2.7.6"
443
  }
444
 },
445
 "nbformat": 4,
446
 "nbformat_minor": 0
447
}
milvus-io_bootcamp

Использование cookies