milvus-io_bootcamp

evaluate_fiqa_customized_RAG.ipynb
487 строк · 13.2 Кб
Перенос по словам
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "source": [
6
    "# Use Ragas to evaluate the customized RAG pipeline based on milvus\n",
7
    "\n",
8
    "**Please note that this test requires a large amount of OpenAI api token consumption. Please read it carefully and Pay attention to the number of times you request access.**"
9
   ],
10
   "metadata": {
11
    "collapsed": false,
12
    "pycharm": {
13
     "name": "#%% md\n"
14
    }
15
   }
16
  },
17
  {
18
   "cell_type": "markdown",
19
   "source": [
20
    "## 1. Prepare environment and data\n",
21
    "\n",
22
    "Before starting, you must set OPENAI_API_KEY in your environment variables."
23
   ],
24
   "metadata": {
25
    "collapsed": false,
26
    "pycharm": {
27
     "name": "#%% md\n"
28
    }
29
   }
30
  },
31
  {
32
   "cell_type": "markdown",
33
   "source": [
34
    "You also need to install [milvus](https://milvus.io/) and start it. You can refer to the [official introduction](https://milvus.io/docs/install_standalone-docker.md) to start quickly."
35
   ],
36
   "metadata": {
37
    "collapsed": false,
38
    "pycharm": {
39
     "name": "#%% md\n"
40
    }
41
   }
42
  },
43
  {
44
   "cell_type": "markdown",
45
   "source": [
46
    "Install pip dependencies"
47
   ],
48
   "metadata": {
49
    "collapsed": false,
50
    "pycharm": {
51
     "name": "#%% md\n"
52
    }
53
   }
54
  },
55
  {
56
   "cell_type": "code",
57
   "execution_count": null,
58
   "outputs": [],
59
   "source": [
60
    "# ! python -m pip install openai beir pandas ragas==0.0.17 pymilvus"
61
   ],
62
   "metadata": {
63
    "collapsed": false,
64
    "pycharm": {
65
     "name": "#%%\n"
66
    }
67
   }
68
  },
69
  {
70
   "cell_type": "markdown",
71
   "source": [
72
    "Download [Financial Opinion Mining and Question Answering (fiqa) Dataset](https://sites.google.com/view/fiqa/) data if it not exists in your local space. We convert it into a ragas form that is easier to process, referring from this [script](https://github.com/explodinggradients/ragas/blob/main/experiments/baselines/fiqa/dataset-exploration-and-baseline.ipynb)."
73
   ],
74
   "metadata": {
75
    "collapsed": false,
76
    "pycharm": {
77
     "name": "#%% md\n"
78
    }
79
   }
80
  },
81
  {
82
   "cell_type": "code",
83
   "execution_count": 1,
84
   "outputs": [
85
    {
86
     "name": "stdout",
87
     "output_type": "stream",
88
     "text": [
89
      "1706\n"
90
     ]
91
    }
92
   ],
93
   "source": [
94
    "import json\n",
95
    "import pandas as pd\n",
96
    "import os\n",
97
    "from tqdm import tqdm\n",
98
    "from datasets import Dataset\n",
99
    "from beir import util\n",
100
    "\n",
101
    "\n",
102
    "def prepare_fiqa_without_answer(knowledge_path):\n",
103
    "    dataset_name = \"fiqa\"\n",
104
    "\n",
105
    "    if not os.path.exists(os.path.join(knowledge_path, f'{dataset_name}.zip')):\n",
106
    "        url = (\n",
107
    "            \"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip\".format(\n",
108
    "                dataset_name\n",
109
    "            )\n",
110
    "        )\n",
111
    "        util.download_and_unzip(url, knowledge_path)\n",
112
    "\n",
113
    "    data_path = os.path.join(knowledge_path, 'fiqa')\n",
114
    "    with open(os.path.join(data_path, \"corpus.jsonl\")) as f:\n",
115
    "        cs = [pd.Series(json.loads(l)) for l in f.readlines()]\n",
116
    "\n",
117
    "    corpus_df = pd.DataFrame(cs)\n",
118
    "\n",
119
    "    corpus_df = corpus_df.rename(columns={\"_id\": \"corpus-id\", \"text\": \"ground_truth\"})\n",
120
    "    corpus_df = corpus_df.drop(columns=[\"title\", \"metadata\"])\n",
121
    "    corpus_df[\"corpus-id\"] = corpus_df[\"corpus-id\"].astype(int)\n",
122
    "    corpus_df.head()\n",
123
    "\n",
124
    "    with open(os.path.join(data_path, \"queries.jsonl\")) as f:\n",
125
    "        qs = [pd.Series(json.loads(l)) for l in f.readlines()]\n",
126
    "\n",
127
    "    queries_df = pd.DataFrame(qs)\n",
128
    "    queries_df = queries_df.rename(columns={\"_id\": \"query-id\", \"text\": \"question\"})\n",
129
    "    queries_df = queries_df.drop(columns=[\"metadata\"])\n",
130
    "    queries_df[\"query-id\"] = queries_df[\"query-id\"].astype(int)\n",
131
    "    queries_df.head()\n",
132
    "\n",
133
    "    splits = [\"dev\", \"test\", \"train\"]\n",
134
    "    split_df = {}\n",
135
    "    for s in splits:\n",
136
    "        split_df[s] = pd.read_csv(os.path.join(data_path, f\"qrels/{s}.tsv\"), sep=\"\\t\").drop(\n",
137
    "            columns=[\"score\"]\n",
138
    "        )\n",
139
    "\n",
140
    "    final_split_df = {}\n",
141
    "    for split in split_df:\n",
142
    "        df = queries_df.merge(split_df[split], on=\"query-id\")\n",
143
    "        df = df.merge(corpus_df, on=\"corpus-id\")\n",
144
    "        df = df.drop(columns=[\"corpus-id\"])\n",
145
    "        grouped = df.groupby(\"query-id\").apply(\n",
146
    "            lambda x: pd.Series(\n",
147
    "                {\n",
148
    "                    \"question\": x[\"question\"].sample().values[0],\n",
149
    "                    \"ground_truths\": x[\"ground_truth\"].tolist(),\n",
150
    "                }\n",
151
    "            )\n",
152
    "        )\n",
153
    "\n",
154
    "        grouped = grouped.reset_index()\n",
155
    "        grouped = grouped.drop(columns=\"query-id\")\n",
156
    "        final_split_df[split] = grouped\n",
157
    "\n",
158
    "    return final_split_df\n",
159
    "\n",
160
    "\n",
161
    "knowledge_datas_path = './knowledge_datas'\n",
162
    "fiqa_path = os.path.join(knowledge_datas_path, 'fiqa_doc.txt')\n",
163
    "\n",
164
    "if not os.path.exists(knowledge_datas_path):\n",
165
    "    os.mkdir(knowledge_datas_path)\n",
166
    "contexts_list = []\n",
167
    "answer_list = []\n",
168
    "\n",
169
    "final_split_df = prepare_fiqa_without_answer(knowledge_datas_path)\n",
170
    "\n",
171
    "docs = []\n",
172
    "\n",
173
    "split = 'test'\n",
174
    "for ds in final_split_df[split][\"ground_truths\"]:\n",
175
    "    docs.extend([d for d in ds])\n",
176
    "print(len(docs))\n",
177
    "\n",
178
    "docs_str = '\\n'.join(docs)\n",
179
    "with open(fiqa_path, 'w') as f:\n",
180
    "    f.write(docs_str)\n",
181
    "\n",
182
    "split = 'test'\n",
183
    "question_list = final_split_df[split][\"question\"].to_list()\n",
184
    "ground_truth_list = final_split_df[split][\"ground_truths\"].to_list()"
185
   ],
186
   "metadata": {
187
    "collapsed": false,
188
    "pycharm": {
189
     "name": "#%%\n"
190
    }
191
   }
192
  },
193
  {
194
   "cell_type": "markdown",
195
   "source": [
196
    "Now we have the question list and the ground truth list. And the knowledge documents are prepared in `fiqa_path`."
197
   ],
198
   "metadata": {
199
    "collapsed": false,
200
    "pycharm": {
201
     "name": "#%% md\n"
202
    }
203
   }
204
  },
205
  {
206
   "cell_type": "markdown",
207
   "source": [
208
    "## 2. Build RAG pipeline based on milvus and langchain\n",
209
    "Split the doc using langchain RecursiveCharacterTextSplitter."
210
   ],
211
   "metadata": {
212
    "collapsed": false,
213
    "pycharm": {
214
     "name": "#%% md\n"
215
    }
216
   }
217
  },
218
  {
219
   "cell_type": "code",
220
   "execution_count": 2,
221
   "outputs": [],
222
   "source": [
223
    "from langchain.document_loaders import TextLoader\n",
224
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
225
    "\n",
226
    "loader = TextLoader(fiqa_path)\n",
227
    "documents = loader.load()\n",
228
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=40)\n",
229
    "docs = text_splitter.split_documents(documents)"
230
   ],
231
   "metadata": {
232
    "collapsed": false,
233
    "pycharm": {
234
     "name": "#%%\n"
235
    }
236
   }
237
  },
238
  {
239
   "cell_type": "markdown",
240
   "source": [
241
    "Prepare embedding model and milvus settings."
242
   ],
243
   "metadata": {
244
    "collapsed": false,
245
    "pycharm": {
246
     "name": "#%% md\n"
247
    }
248
   }
249
  },
250
  {
251
   "cell_type": "code",
252
   "execution_count": 3,
253
   "outputs": [
254
    {
255
     "data": {
256
      "text/plain": "Batches:   0%|          | 0/77 [00:00<?, ?it/s]",
257
      "application/vnd.jupyter.widget-view+json": {
258
       "version_major": 2,
259
       "version_minor": 0,
260
       "model_id": "d8eed54919ba4be49c14c2f8899d8276"
261
      }
262
     },
263
     "metadata": {},
264
     "output_type": "display_data"
265
    }
266
   ],
267
   "source": [
268
    "from langchain.embeddings import HuggingFaceEmbeddings\n",
269
    "from langchain.vectorstores.milvus import Milvus\n",
270
    "\n",
271
    "embeddings = HuggingFaceEmbeddings(model_name=\"BAAI/bge-base-en\")\n",
272
    "\n",
273
    "vector_db = Milvus.from_documents(\n",
274
    "    docs,\n",
275
    "    embeddings,\n",
276
    "    connection_args={\"host\": \"127.0.0.1\", \"port\": \"19530\"},\n",
277
    "    drop_old=True\n",
278
    ")"
279
   ],
280
   "metadata": {
281
    "collapsed": false,
282
    "pycharm": {
283
     "name": "#%%\n"
284
    }
285
   }
286
  },
287
  {
288
   "cell_type": "markdown",
289
   "source": [
290
    "Build agent using langchain."
291
   ],
292
   "metadata": {
293
    "collapsed": false,
294
    "pycharm": {
295
     "name": "#%% md\n"
296
    }
297
   }
298
  },
299
  {
300
   "cell_type": "code",
301
   "execution_count": 4,
302
   "outputs": [],
303
   "source": [
304
    "def search_milvus(question, top_k=5):\n",
305
    "    contexts = vector_db.similarity_search(question, k=top_k)\n",
306
    "    return contexts[:top_k]"
307
   ],
308
   "metadata": {
309
    "collapsed": false,
310
    "pycharm": {
311
     "name": "#%%\n"
312
    }
313
   }
314
  },
315
  {
316
   "cell_type": "code",
317
   "execution_count": 5,
318
   "outputs": [],
319
   "source": [
320
    "from langchain.tools import Tool\n",
321
    "from langchain.memory import ConversationBufferMemory\n",
322
    "from langchain.chat_models import ChatOpenAI\n",
323
    "from langchain.agents import AgentExecutor, ConversationalChatAgent\n",
324
    "\n",
325
    "chat_llm = ChatOpenAI(model_name='gpt-4-1106-preview')\n",
326
    "tools = [\n",
327
    "    Tool(\n",
328
    "        name='Search',\n",
329
    "        func=search_milvus,\n",
330
    "        description='useful for search professional knowledge and information'\n",
331
    "    )\n",
332
    "]\n",
333
    "agent = ConversationalChatAgent.from_llm_and_tools(llm=chat_llm, tools=tools)\n",
334
    "\n",
335
    "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True, output_key='output')\n",
336
    "agent_chain = AgentExecutor.from_agent_and_tools(\n",
337
    "    agent=agent,\n",
338
    "    tools=tools,\n",
339
    "    memory=memory,\n",
340
    "    return_intermediate_steps=True,\n",
341
    "    # verbose=True,\n",
342
    ")"
343
   ],
344
   "metadata": {
345
    "collapsed": false,
346
    "pycharm": {
347
     "name": "#%%\n"
348
    }
349
   }
350
  },
351
  {
352
   "cell_type": "code",
353
   "execution_count": 8,
354
   "outputs": [],
355
   "source": [
356
    "import time\n",
357
    "\n",
358
    "\n",
359
    "def retry_agent_chain(retry_num=4, retry_interval=4):\n",
360
    "    answer = 'failed. please retry.'\n",
361
    "    contexts = ['failed. please retry.']\n",
362
    "    for _ in range(retry_num):\n",
363
    "        try:\n",
364
    "            agent_result = agent_chain(question)\n",
365
    "            contexts = [document.page_content for document in agent_result['intermediate_steps'][0][1]]\n",
366
    "            answer = agent_result['output']\n",
367
    "            break\n",
368
    "        except Exception as e:\n",
369
    "            time.sleep(retry_interval)\n",
370
    "            print(e)\n",
371
    "            print('failed, retry...')\n",
372
    "            continue\n",
373
    "    return answer, contexts"
374
   ],
375
   "metadata": {
376
    "collapsed": false,
377
    "pycharm": {
378
     "name": "#%%\n"
379
    }
380
   }
381
  },
382
  {
383
   "cell_type": "markdown",
384
   "source": [
385
    "## 3. Start Ragas Evaluation\n",
386
    "\n",
387
    "Note that a large amount of OpenAI api token is consumed. Every time you ask a question and every evaluation, you will ask the OpenAI service. Please pay attention to your token consumption. If you only want to run a small number of tests, you can modify the code to reduce the test size."
388
   ],
389
   "metadata": {
390
    "collapsed": false,
391
    "pycharm": {
392
     "name": "#%% md\n"
393
    }
394
   }
395
  },
396
  {
397
   "cell_type": "code",
398
   "execution_count": null,
399
   "outputs": [],
400
   "source": [
401
    "contexts_list = []\n",
402
    "answer_list = []\n",
403
    "for question in tqdm(question_list):\n",
404
    "    memory.clear()\n",
405
    "    answer, contexts = retry_agent_chain()\n",
406
    "    # print(f'answer = {answer}')\n",
407
    "    # print(f'contexts = {contexts}')\n",
408
    "    # print('=' * 80)\n",
409
    "    answer_list.append(answer)\n",
410
    "    contexts_list.append(contexts)"
411
   ],
412
   "metadata": {
413
    "collapsed": false,
414
    "pycharm": {
415
     "name": "#%%\n"
416
    }
417
   }
418
  },
419
  {
420
   "cell_type": "markdown",
421
   "source": [
422
    "You can choose the indicators you care about to test."
423
   ],
424
   "metadata": {
425
    "collapsed": false,
426
    "pycharm": {
427
     "name": "#%% md\n"
428
    }
429
   }
430
  },
431
  {
432
   "cell_type": "code",
433
   "execution_count": null,
434
   "outputs": [],
435
   "source": [
436
    "from ragas import evaluate\n",
437
    "from ragas.metrics import answer_relevancy, faithfulness, context_recall, context_precision, answer_similarity\n",
438
    "\n",
439
    "ds = Dataset.from_dict({\"question\": question_list,\n",
440
    "                        \"contexts\": contexts_list,\n",
441
    "                        \"answer\": answer_list,\n",
442
    "                        \"ground_truths\": ground_truth_list})\n",
443
    "\n",
444
    "result = evaluate(\n",
445
    "    ds,\n",
446
    "    metrics=[\n",
447
    "        context_precision,\n",
448
    "        # context_recall,\n",
449
    "        # faithfulness,\n",
450
    "        # answer_relevancy,\n",
451
    "        # answer_similarity,\n",
452
    "        # answer_correctness,\n",
453
    "    ],\n",
454
    "\n",
455
    ")\n",
456
    "print(result)"
457
   ],
458
   "metadata": {
459
    "collapsed": false,
460
    "pycharm": {
461
     "name": "#%%\n"
462
    }
463
   }
464
  }
465
 ],
466
 "metadata": {
467
  "kernelspec": {
468
   "display_name": "Python 3",
469
   "language": "python",
470
   "name": "python3"
471
  },
472
  "language_info": {
473
   "codemirror_mode": {
474
    "name": "ipython",
475
    "version": 2
476
   },
477
   "file_extension": ".py",
478
   "mimetype": "text/x-python",
479
   "name": "python",
480
   "nbconvert_exporter": "python",
481
   "pygments_lexer": "ipython2",
482
   "version": "2.7.6"
483
  }
484
 },
485
 "nbformat": 4,
486
 "nbformat_minor": 0
487
}
milvus-io_bootcamp

Использование cookies