superknowa

PDF_Context_Retriever .ipynb
713 строк · 28.7 Кб
Перенос по словам
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 152,
6
   "id": "8dc02932",
7
   "metadata": {},
8
   "outputs": [],
9
   "source": [
10
    "#pip install PdfReader"
11
   ]
12
  },
13
  {
14
   "cell_type": "markdown",
15
   "id": "2b50bdff",
16
   "metadata": {},
17
   "source": [
18
    "## Convert PDF to Text"
19
   ]
20
  },
21
  {
22
   "cell_type": "code",
23
   "execution_count": 138,
24
   "id": "5e4f1f7f",
25
   "metadata": {},
26
   "outputs": [],
27
   "source": [
28
    "import PyPDF2\n",
29
    "\n",
30
    "def extract_text_from_pdf(pdf_file_path):\n",
31
    "    with open(pdf_file_path, 'rb') as file:\n",
32
    "        reader = PyPDF2.PdfReader(file)\n",
33
    "        num_pages = len(reader.pages)\n",
34
    "\n",
35
    "        combined_text = \"\"\n",
36
    "        for page_number in range(num_pages):\n",
37
    "            page = reader.pages[page_number]\n",
38
    "            text = page.extract_text()\n",
39
    "            combined_text += text\n",
40
    "\n",
41
    "        return combined_text\n",
42
    "\n",
43
    "# Usage example\n",
44
    "pdf_path = '1_55661.pdf'\n",
45
    "\n",
46
    "# context = extract_text_from_pdf(pdf_path)[3268:7000]\n",
47
    "# print(context)\n"
48
   ]
49
  },
50
  {
51
   "cell_type": "code",
52
   "execution_count": 82,
53
   "id": "583bc97d",
54
   "metadata": {},
55
   "outputs": [],
56
   "source": [
57
    "def separate_paragraphs(text):\n",
58
    "    paragraphs = text.split(\"  \\n\")  # Split text by double line breaks\n",
59
    "\n",
60
    "    # Remove leading and trailing whitespaces from each paragraph\n",
61
    "    paragraphs = [paragraph.strip() for paragraph in paragraphs]\n",
62
    "\n",
63
    "    return paragraphs\n"
64
   ]
65
  },
66
  {
67
   "cell_type": "markdown",
68
   "id": "8678e5f2",
69
   "metadata": {},
70
   "source": [
71
    "### Count total paragraph from PDF"
72
   ]
73
  },
74
  {
75
   "cell_type": "code",
76
   "execution_count": 137,
77
   "id": "93f6fa91",
78
   "metadata": {},
79
   "outputs": [
80
    {
81
     "data": {
82
      "text/plain": [
83
       "283"
84
      ]
85
     },
86
     "execution_count": 137,
87
     "metadata": {},
88
     "output_type": "execute_result"
89
    }
90
   ],
91
   "source": [
92
    "full_doc = extract_text_from_pdf(pdf_path)\n",
93
    "full_paras = separate_paragraphs(full_doc)\n",
94
    "len(full_paras)"
95
   ]
96
  },
97
  {
98
   "cell_type": "code",
99
   "execution_count": 96,
100
   "id": "1908f943",
101
   "metadata": {},
102
   "outputs": [],
103
   "source": [
104
    "question = \"what are the supported platform for ssh runner?\""
105
   ]
106
  },
107
  {
108
   "cell_type": "markdown",
109
   "id": "1fa3bcef",
110
   "metadata": {},
111
   "source": [
112
    "### Retrieve the Context from Text "
113
   ]
114
  },
115
  {
116
   "cell_type": "code",
117
   "execution_count": 187,
118
   "id": "20584c80",
119
   "metadata": {},
120
   "outputs": [
121
    {
122
     "name": "stdout",
123
     "output_type": "stream",
124
     "text": [
125
      "Most Relevant context: First, download the gitlab-runner distribution for the appropriate platform at \n",
126
      "https://docs.gitlab.com/runner/install/ . In this configura tion, the SSH GitLab runner is installed on a Linux \n",
127
      "x86 Red Hat distribution, but similar installation and configuration can be performed on other supported \n",
128
      "platforms.\n",
129
      "\n",
130
      "The SSH runners are executed on a supported platform (Windows, Linux or MacOS) and connect to a \n",
131
      "target machine through SSH for the pipeline execution. In that configuration, SSH runners act like \n",
132
      "gateways to connect platforms: the GitLab server will send the pipeline actions to the SSH runner which \n",
133
      "will forward them to the target machine, in this case th e z/OS environment. The different stages of the \n",
134
      "pipeline will be executed on the target z/OS machine and results will be sent back to the GitLab server \n",
135
      "through the same mechanism.\n",
136
      "\n",
137
      "Provide the GitLab server URL and the registration token to register the runner. Then provide a description \n",
138
      "for this runner and leave empty when prompted for tags  (tags can be modified later if necessary) . Provide \n",
139
      "the type of executor by typing ssh and provide the nec essary information for SSH communication (IP or \n",
140
      "hostname of the target z/OS machine, port, username and password or path to the SSH identity file). \n",
141
      "When finished, start the runner by issuing the sudo g itlab-runner start  command.\n",
142
      "\n",
143
      "Similar pipeline definitions are configured for the EPSM project and for the zAppBuild project.\n",
144
      "\n",
145
      "In Linux, install the downloaded package with the command rpm -i gitlab-runner_amd64.rpm . \n",
146
      "Then register the runner to the GitLab server, by issuing the sudo  gitlab-runner register  command. The \n",
147
      "configurator prompts for several pieces of information that are displayed in the GitLab server \n",
148
      "Administration area (Runners section):\n"
149
     ]
150
    },
151
    {
152
     "data": {
153
      "text/plain": [
154
       "1789"
155
      ]
156
     },
157
     "execution_count": 187,
158
     "metadata": {},
159
     "output_type": "execute_result"
160
    }
161
   ],
162
   "source": [
163
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
164
    "from sklearn.metrics.pairwise import cosine_similarity\n",
165
    "\n",
166
    "def find_most_similar_text(sentence, text_objects):\n",
167
    "    # Create a TF-IDF vectorizer\n",
168
    "    vectorizer = TfidfVectorizer()\n",
169
    "\n",
170
    "    # Fit the vectorizer on text_objects\n",
171
    "    tfidf_matrix = vectorizer.fit_transform(text_objects)\n",
172
    "\n",
173
    "    # Transform the input sentence using the fitted vectorizer\n",
174
    "    sentence_vector = vectorizer.transform([sentence])\n",
175
    "\n",
176
    "    # Calculate cosine similarities between the sentence vector and all text objects\n",
177
    "    similarities = cosine_similarity(sentence_vector, tfidf_matrix)\n",
178
    "\n",
179
    "    # Find the index of the most similar text object\n",
180
    "    most_similar_index = similarities.argmax()\n",
181
    "\n",
182
    "    # Get the most similar text object\n",
183
    "    most_similar_text = text_objects[most_similar_index]\n",
184
    "\n",
185
    "    # Check the length of the most similar text object\n",
186
    "    if len(most_similar_text) < 2000:\n",
187
    "        # Find the indices and similarities of the related text objects\n",
188
    "        sorted_indices = similarities.argsort()[0][::-1]\n",
189
    "        sorted_similarities = similarities[0, sorted_indices]\n",
190
    "\n",
191
    "        # Iterate over the related text objects and add paragraphs until length exceeds 2000 characters\n",
192
    "        for idx, similarity in zip(sorted_indices, sorted_similarities):\n",
193
    "            if idx != most_similar_index:\n",
194
    "                related_text = text_objects[idx]\n",
195
    "                if len(most_similar_text) + len(related_text) + 2 > 2000:  # +2 for \"\\n\\n\"\n",
196
    "                    break\n",
197
    "                most_similar_text += \"\\n\\n\" + related_text\n",
198
    "\n",
199
    "    return most_similar_text\n",
200
    "question = \"what are the supported platform for ssh runner?\"\n",
201
    "\n",
202
    "most_relevant_text = find_most_similar_text(question, ful_paras)\n",
203
    "print(\"Most Relevant context:\", most_relevant_text)\n",
204
    "len(most_relevant_text)"
205
   ]
206
  },
207
  {
208
   "cell_type": "markdown",
209
   "id": "88d25bba",
210
   "metadata": {},
211
   "source": [
212
    "### Run LLM model on most_relevant_text "
213
   ]
214
  },
215
  {
216
   "cell_type": "code",
217
   "execution_count": 157,
218
   "id": "4d754069",
219
   "metadata": {},
220
   "outputs": [
221
    {
222
     "name": "stdout",
223
     "output_type": "stream",
224
     "text": [
225
      "INPUT PROMPT:  Answer the question based on the context below. Context: First, download the gitlab-runner distribution for the appropriate platform at \n",
226
      "https://docs.gitlab.com/runner/install/ . In this configura tion, the SSH GitLab runner is installed on a Linux \n",
227
      "x86 Red Hat distribution, but similar installation and configuration can be performed on other supported \n",
228
      "platforms.\n",
229
      "\n",
230
      "The SSH runners are executed on a supported platform (Windows, Linux or MacOS) and connect to a \n",
231
      "target machine through SSH for the pipeline execution. In that configuration, SSH runners act like \n",
232
      "gateways to connect platforms: the GitLab server will send the pipeline actions to the SSH runner which \n",
233
      "will forward them to the target machine, in this case th e z/OS environment. The different stages of the \n",
234
      "pipeline will be executed on the target z/OS machine and results will be sent back to the GitLab server \n",
235
      "through the same mechanism.\n",
236
      "\n",
237
      "Provide the GitLab server URL and the registration token to register the runner. Then provide a description \n",
238
      "for this runner and leave empty when prompted for tags  (tags can be modified later if necessary) . Provide \n",
239
      "the type of executor by typing ssh and provide the nec essary information for SSH communication (IP or \n",
240
      "hostname of the target z/OS machine, port, username and password or path to the SSH identity file). \n",
241
      "When finished, start the runner by issuing the sudo g itlab-runner start  command.\n",
242
      "\n",
243
      "Similar pipeline definitions are configured for the EPSM project and for the zAppBuild project.\n",
244
      "\n",
245
      "In Linux, install the downloaded package with the command rpm -i gitlab-runner_amd64.rpm . \n",
246
      "Then register the runner to the GitLab server, by issuing the sudo  gitlab-runner register  command. The \n",
247
      "configurator prompts for several pieces of information that are displayed in the GitLab server \n",
248
      "Administration area (Runners section): Question: what are the supported platform for ssh runner?\n",
249
      "BAM OUTPUT:  {'model_id': 'bigscience/bloom', 'created_at': '2023-06-21T05:24:53.191Z', 'results': [{'generated_text': ' \\nAnswer: Linux, Windows, MacOS Question', 'generated_token_count': 10, 'input_token_count': 397, 'stop_reason': 'STOP_SEQUENCE', 'seed': 2059693411}]}\n"
250
     ]
251
    }
252
   ],
253
   "source": [
254
    "import os\n",
255
    "import time\n",
256
    "from textwrap import dedent\n",
257
    "from PIL import Image\n",
258
    "import json\n",
259
    "import re\n",
260
    "import pandas as pd\n",
261
    "from ibm_watson import DiscoveryV2\n",
262
    "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator\n",
263
    "from nltk.translate import meteor_score as ms\n",
264
    "from rouge_score import rouge_scorer\n",
265
    "from bs4 import BeautifulSoup\n",
266
    "import requests\n",
267
    "from nltk.translate import bleu_score\n",
268
    "import nltk\n",
269
    "import torch\n",
270
    "import gc\n",
271
    "\n",
272
    "\n",
273
    "#bamToken = 'Bam-key'\n",
274
    "\n",
275
    "\n",
276
    "# question  = \"What is the purpose of the provided extracts and how can they be integrated into a pipeline for z/OS-related capabilities?\"\n",
277
    "\n",
278
    "question = \"what are the supported platform for ssh runner?\"\n",
279
    "\n",
280
    "chat_history = f\"Answer the question based on the context below. \" + \\\n",
281
    "    \"Context: \"  + most_relevant_text + \\\n",
282
    "    \" Question: \" + question\n",
283
    "\n",
284
    "model_input = chat_history.replace(\"<split>\", \"\\n\")\n",
285
    "\n",
286
    "print(\"INPUT PROMPT: \", model_input)\n",
287
    "\n",
288
    "\n",
289
    "headers = {\n",
290
    "    'Content-Type': 'application/json',\n",
291
    "    'Authorization': bamToken,\n",
292
    "}\n",
293
    "\n",
294
    "json_data = {\n",
295
    "    'model_id': 'bigscience/bloom',\n",
296
    "    # 'model_id': 'google/flan-ul2',\n",
297
    "#     'model_id': 'google/flan-t5-xxl',\n",
298
    "    # 'model_id': 'ibm/coga-3b-0.1',\n",
299
    "    # 'model_id':'flan-t5-xl-mpt-2zfTOrpU-2023-05-01-19-48-20',\n",
300
    "    \n",
301
    "    'inputs':  [model_input],\n",
302
    "\n",
303
    "        #bam\n",
304
    "        'parameters': {\n",
305
    "        # \"stream\": \"true\",\n",
306
    "        'temperature': 0.3,\n",
307
    "        'min_new_tokens': 10,\n",
308
    "        'max_new_tokens': 200,\n",
309
    "        'stop_sequences': ['Question']\n",
310
    "\n",
311
    "         # Modify this parameter to reduce the batch size\n",
312
    "        # 'decoding_method': 'greedy'\n",
313
    "        # 'repetition_penalty': 1.0,\n",
314
    "    },\n",
315
    "}\n",
316
    "\n",
317
    "#bam\n",
318
    "# response = requests.post('https://bam-api.res.ibm.com/v1/generate', headers=headers, json=json_data)\n",
319
    "\n",
320
    "#Coga\n",
321
    "response = requests.post('https://fmaas-dev-api.bx.cloud9.ibm.com/v1/generate', headers=headers, json=json_data)\n",
322
    "\n",
323
    "\n",
324
    "json_response = json.loads(response.content.decode(\"utf-8\"))\n",
325
    "print(\"BAM OUTPUT: \", json_response)\n",
326
    "\n",
327
    "model_output1 = json_response['results'][0]['generated_text']\n",
328
    "model_output1 = model_output1.replace(\"Question\", '')\n",
329
    "model_output1 = model_output1.replace(\"Answer: \", '')\n",
330
    "\n",
331
    "# Seprate sentences\n",
332
    "sentences = model_output1.split(\". \")\n",
333
    "# remove duplicates SENTENCES\n",
334
    "unique_sentences = list( dict.fromkeys(sentences))\n",
335
    "\n",
336
    "if not model_output1.endswith(\".\"):\n",
337
    "# remove the last sentence if not . at last\n",
338
    "    unique_sentences.pop()\n",
339
    "\n",
340
    "# join unique sentences back into a text \n",
341
    "model_output = \". \".join(unique_sentences)+ \".\""
342
   ]
343
  },
344
  {
345
   "cell_type": "code",
346
   "execution_count": 158,
347
   "id": "4a8492b0",
348
   "metadata": {},
349
   "outputs": [
350
    {
351
     "name": "stdout",
352
     "output_type": "stream",
353
     "text": [
354
      "FINAL ANSWER:   \n",
355
      "Linux, Windows, MacOS \n"
356
     ]
357
    }
358
   ],
359
   "source": [
360
    "print(\"FINAL ANSWER: \", model_output1)"
361
   ]
362
  },
363
  {
364
   "attachments": {},
365
   "cell_type": "markdown",
366
   "id": "c478de62",
367
   "metadata": {},
368
   "source": [
369
    "## Example Questions"
370
   ]
371
  },
372
  {
373
   "cell_type": "code",
374
   "execution_count": 167,
375
   "id": "bb5644a4",
376
   "metadata": {},
377
   "outputs": [
378
    {
379
     "name": "stdout",
380
     "output_type": "stream",
381
     "text": [
382
      "INPUT PROMPT:  Answer the question based on the context below. Context: As the execution of the pipeline is performed through an SSH channel, it is important to have the Bash \n",
383
      "shell available f or the user who will execute the build actions. More importantly, the Bash shell must be \n",
384
      "set to be the first program executed by the user when logging into z/OS Unix System Services. This \n",
385
      "parameter is controlled through the PROGRAM keyword in the RACF’s us er definition, as part of the OMVS \n",
386
      "segment. To change the PROGRAM value for an existing user, please customize and use the following \n",
387
      "RACF ALTUSER command:\n",
388
      "\n",
389
      "At the end of the job execution, the developer can open the Wazi Analyze UI, with the link provided in the \n",
390
      "output log of the job. Through this web -based  solution, analysis can be performed with the latest versions \n",
391
      "of the artifacts that were used during the build, ensuring consistency between the structure of the \n",
392
      "application and the actual load modules that were built.\n",
393
      "\n",
394
      "Using the multi -project pipelines feature, it is possible to trigger the execution of the external projects \n",
395
      "pipeline, which solves this complex situation. To be cloned on z/OS USS, the pipelines for t he EPSM \n",
396
      "project, the Common project and the zAppBuild project will just contain a dummy operation, as cloning is \n",
397
      "automatically generated by the GitLab server in the execution of the pipeline. The pipeline definition for \n",
398
      "the Common project is as follows:\n",
399
      "\n",
400
      "In the GitLab CI /CD section of the EPSC project, the correct execution of the pipeline can be checked: Question: the execution of the pipeline is performed through which channel?\n",
401
      "BAM OUTPUT:  {'model_id': 'bigscience/bloom', 'created_at': '2023-06-21T05:33:01.741Z', 'results': [{'generated_text': ' Answer: SSH channel. Question: it is important to have the Bash shell available for the user who will execute the build actions. Answer: Yes. Question', 'generated_token_count': 31, 'input_token_count': 340, 'stop_reason': 'STOP_SEQUENCE', 'seed': 4071738245}]}\n"
402
     ]
403
    }
404
   ],
405
   "source": [
406
    "question = \"the execution of the pipeline is performed through which channel?\"\n",
407
    "\n",
408
    "most_relevant_text = find_most_similar_text(question, ful_paras)\n",
409
    "\n",
410
    "chat_history = f\"Answer the question based on the context below. \" + \\\n",
411
    "    \"Context: \"  + most_relevant_text + \\\n",
412
    "    \" Question: \" + question\n",
413
    "\n",
414
    "model_input = chat_history.replace(\"<split>\", \"\\n\")\n",
415
    "\n",
416
    "print(\"INPUT PROMPT: \", model_input)\n",
417
    "\n",
418
    "\n",
419
    "headers = {\n",
420
    "    'Content-Type': 'application/json',\n",
421
    "    'Authorization': bamToken,\n",
422
    "}\n",
423
    "\n",
424
    "json_data = {\n",
425
    "    'model_id': 'bigscience/bloom',\n",
426
    "\n",
427
    "    'inputs':  [model_input],\n",
428
    "\n",
429
    "        #bam\n",
430
    "        'parameters': {\n",
431
    "        # \"stream\": \"true\",\n",
432
    "        'temperature': 0.3,\n",
433
    "        'min_new_tokens': 10,\n",
434
    "        'max_new_tokens': 200,\n",
435
    "        'stop_sequences': ['Question']\n",
436
    "    },\n",
437
    "}\n",
438
    "\n",
439
    "#bam\n",
440
    "# response = requests.post('https://bam-api.res.ibm.com/v1/generate', headers=headers, json=json_data)\n",
441
    "\n",
442
    "#Coga\n",
443
    "response = requests.post('https://fmaas-dev-api.bx.cloud9.ibm.com/v1/generate', headers=headers, json=json_data)\n",
444
    "\n",
445
    "\n",
446
    "json_response = json.loads(response.content.decode(\"utf-8\"))\n",
447
    "print(\"BAM OUTPUT: \", json_response)\n",
448
    "\n",
449
    "model_output1 = json_response['results'][0]['generated_text']\n",
450
    "model_output1 = model_output1.replace(\"Question\", '')\n",
451
    "model_output1 = model_output1.replace(\"Answer: \", '')\n",
452
    "\n",
453
    "# Seprate sentences\n",
454
    "sentences = model_output1.split(\". \")\n",
455
    "# remove duplicates SENTENCES\n",
456
    "unique_sentences = list( dict.fromkeys(sentences))\n",
457
    "\n",
458
    "if not model_output1.endswith(\".\"):\n",
459
    "# remove the last sentence if not . at last\n",
460
    "    unique_sentences.pop()\n",
461
    "\n",
462
    "# join unique sentences back into a text \n",
463
    "model_output = \". \".join(unique_sentences)+ \".\""
464
   ]
465
  },
466
  {
467
   "cell_type": "code",
468
   "execution_count": 168,
469
   "id": "f65b37cf",
470
   "metadata": {},
471
   "outputs": [
472
    {
473
     "name": "stdout",
474
     "output_type": "stream",
475
     "text": [
476
      "FINAL ANSWER:   SSH channel. : it is important to have the Bash shell available for the user who will execute the build actions. Yes. \n"
477
     ]
478
    }
479
   ],
480
   "source": [
481
    "print(\"FINAL ANSWER: \", model_output1)"
482
   ]
483
  },
484
  {
485
   "cell_type": "code",
486
   "execution_count": 171,
487
   "id": "14078b0b",
488
   "metadata": {},
489
   "outputs": [
490
    {
491
     "name": "stdout",
492
     "output_type": "stream",
493
     "text": [
494
      "INPUT PROMPT:  Answer the question based on the context below. Context: As the execution of the pipeline is performed through an SSH channel, it is important to have the Bash \n",
495
      "shell available f or the user who will execute the build actions. More importantly, the Bash shell must be \n",
496
      "set to be the first program executed by the user when logging into z/OS Unix System Services. This \n",
497
      "parameter is controlled through the PROGRAM keyword in the RACF’s us er definition, as part of the OMVS \n",
498
      "segment. To change the PROGRAM value for an existing user, please customize and use the following \n",
499
      "RACF ALTUSER command:\n",
500
      "\n",
501
      "At the end of the job execution, the developer can open the Wazi Analyze UI, with the link provided in the \n",
502
      "output log of the job. Through this web -based  solution, analysis can be performed with the latest versions \n",
503
      "of the artifacts that were used during the build, ensuring consistency between the structure of the \n",
504
      "application and the actual load modules that were built.\n",
505
      "\n",
506
      "Using the multi -project pipelines feature, it is possible to trigger the execution of the external projects \n",
507
      "pipeline, which solves this complex situation. To be cloned on z/OS USS, the pipelines for t he EPSM \n",
508
      "project, the Common project and the zAppBuild project will just contain a dummy operation, as cloning is \n",
509
      "automatically generated by the GitLab server in the execution of the pipeline. The pipeline definition for \n",
510
      "the Common project is as follows:\n",
511
      "\n",
512
      "In the GitLab CI /CD section of the EPSC project, the correct execution of the pipeline can be checked: Question: the execution of the pipeline is performed through which channel?\n",
513
      "BAM OUTPUT:  {'model_id': 'bigscience/bloom', 'created_at': '2023-06-21T06:29:37.216Z', 'results': [{'generated_text': ' Answer: SSH channel. Question: what is the importance of having the Bash shell available for the user who will execute the build actions? Answer: the user who will execute the build actions must have the Bash shell available. Question', 'generated_token_count': 45, 'input_token_count': 340, 'stop_reason': 'STOP_SEQUENCE', 'seed': 3686780085}]}\n",
514
      "FINAL ANSWER:   SSH channel. : what is the importance of having the Bash shell available for the user who will execute the build actions? the user who will execute the build actions must have the Bash shell available. \n"
515
     ]
516
    }
517
   ],
518
   "source": [
519
    "question = \"the execution of the pipeline is performed through which channel?\"\n",
520
    "\n",
521
    "most_relevant_text = find_most_similar_text(question, ful_paras)\n",
522
    "\n",
523
    "chat_history = f\"Answer the question based on the context below. \" + \\\n",
524
    "    \"Context: \"  + most_relevant_text + \\\n",
525
    "    \" Question: \" + question\n",
526
    "\n",
527
    "model_input = chat_history.replace(\"<split>\", \"\\n\")\n",
528
    "\n",
529
    "print(\"INPUT PROMPT: \", model_input)\n",
530
    "\n",
531
    "\n",
532
    "headers = {\n",
533
    "    'Content-Type': 'application/json',\n",
534
    "    'Authorization': bamToken,\n",
535
    "}\n",
536
    "\n",
537
    "json_data = {\n",
538
    "    'model_id': 'bigscience/bloom',    \n",
539
    "    'inputs':  [model_input],\n",
540
    "\n",
541
    "        #bam\n",
542
    "        'parameters': {\n",
543
    "        # \"stream\": \"true\",\n",
544
    "        'temperature': 0.3,\n",
545
    "        'min_new_tokens': 10,\n",
546
    "        'max_new_tokens': 200,\n",
547
    "        'stop_sequences': ['Question']\n",
548
    "    },\n",
549
    "}\n",
550
    "\n",
551
    "#bam\n",
552
    "# response = requests.post('https://bam-api.res.ibm.com/v1/generate', headers=headers, json=json_data)\n",
553
    "\n",
554
    "#Coga\n",
555
    "response = requests.post('https://fmaas-dev-api.bx.cloud9.ibm.com/v1/generate', headers=headers, json=json_data)\n",
556
    "\n",
557
    "\n",
558
    "json_response = json.loads(response.content.decode(\"utf-8\"))\n",
559
    "print(\"BAM OUTPUT: \", json_response)\n",
560
    "\n",
561
    "model_output1 = json_response['results'][0]['generated_text']\n",
562
    "model_output1 = model_output1.replace(\"Question\", '')\n",
563
    "model_output1 = model_output1.replace(\"Answer: \", '')\n",
564
    "\n",
565
    "# Seprate sentences\n",
566
    "sentences = model_output1.split(\". \")\n",
567
    "# remove duplicates SENTENCES\n",
568
    "unique_sentences = list( dict.fromkeys(sentences))\n",
569
    "\n",
570
    "if not model_output1.endswith(\".\"):\n",
571
    "# remove the last sentence if not . at last\n",
572
    "    unique_sentences.pop()\n",
573
    "\n",
574
    "# join unique sentences back into a text \n",
575
    "model_output = \". \".join(unique_sentences)+ \".\"\n",
576
    "\n",
577
    "print(\"FINAL ANSWER: \", model_output1)"
578
   ]
579
  },
580
  {
581
   "cell_type": "code",
582
   "execution_count": 185,
583
   "id": "919ef4d4",
584
   "metadata": {},
585
   "outputs": [
586
    {
587
     "name": "stdout",
588
     "output_type": "stream",
589
     "text": [
590
      "INPUT PROMPT:  Answer the question based on the context below. Context: To change this default behavior of checking Git for every job, specific variables can be set either at the \n",
591
      "pipeline level or at the job level. The latter option is the preferred way for fine -tuning and manage the \n",
592
      "repository checkout granularly.  The GIT_ST RATEGY12 variable controls how the GitLab CI/CD pipeline \n",
593
      "performs the Git checkout: when set to clone , the entire repository will be cloned for this job; when set \n",
594
      "to fetch , only the changes in the Git repository of the project are retrieved. Whe n set to none , no Git \n",
595
      "operation is performed, and the project’s workspace is left untouched.\n",
596
      "\n",
597
      "Integrating IBM z/OS platform in CI/CD pipelines with GitLab   Page 7/22 3 Setting up an initial pipeline with  GitLab CI /CD \n",
598
      "Once the GitLab SSH runner is set up for z/OS, CI /CD pipelines can be executed on Mainframe. The \n",
599
      "definition of pipelines in GitLab CI are done throug h a configuration file, called .gitlab-ci.yml  and stored \n",
600
      "be default at the root level of the project’s repository  (it can be changed in the project’s CI/CD \n",
601
      "configuration) . GitLab provides a complete documentation about keyworks that can be used in pipeline\n",
602
      "\n",
603
      "CI_DEBUG_TRACE:  \"true\" \n",
604
      "For convenience, the extended trace has been activated to facilitate debugging, but it is recommended to \n",
605
      "remove it when the pipeline performs the desired actions correctly.  Please n ote the use of GitLab CI/CD \n",
606
      "variables which are available in the pipeline definition: in this case, the $CI_PROJECT_DIR  variable  contains \n",
607
      "the path where the project is checked out on the target machine, and the $CI_PIPELINE_ID  variable \n",
608
      "contains the unique number associated with the pipeline execution. These variables are defined in the \n",
609
      "GitLab CI/CD documentation8. \n",
610
      "In the GitLab CI /CD section of this project, the execution of this pipeline was manually triggered and \n",
611
      "finished successfully in 2 8 seconds:\n",
612
      "\n",
613
      "Integrating IBM z/OS platform \n",
614
      "in CI/CD pipelines with GitLab\n",
615
      "\n",
616
      "5.1.2 Integratin g Code Review into the GitLab CI/CD pipeline Question: Which variable controls GitLab CI/CD Git checkout?\n",
617
      "BAM OUTPUT:  {'model_id': 'bigscience/bloom', 'created_at': '2023-06-21T09:56:18.612Z', 'results': [{'generated_text': ' Answer: GIT_ST RATEGY12 \\nQuestion', 'generated_token_count': 11, 'input_token_count': 480, 'stop_reason': 'STOP_SEQUENCE', 'seed': 1168129335}]}\n",
618
      "FINAL ANSWER:   GIT_ST RATEGY12 \n",
619
      "\n"
620
     ]
621
    }
622
   ],
623
   "source": [
624
    "question = \"Which variable controls GitLab CI/CD Git checkout?\"\n",
625
    "\n",
626
    "most_relevant_text = find_most_similar_text(question, ful_paras)\n",
627
    "\n",
628
    "chat_history = f\"Answer the question based on the context below. \" + \\\n",
629
    "    \"Context: \"  + most_relevant_text + \\\n",
630
    "    \" Question: \" + question\n",
631
    "\n",
632
    "model_input = chat_history.replace(\"<split>\", \"\\n\")\n",
633
    "\n",
634
    "print(\"INPUT PROMPT: \", model_input)\n",
635
    "\n",
636
    "\n",
637
    "headers = {\n",
638
    "    'Content-Type': 'application/json',\n",
639
    "    'Authorization': bamToken,\n",
640
    "}\n",
641
    "\n",
642
    "json_data = {\n",
643
    "    'model_id': 'bigscience/bloom',\n",
644
    "    'inputs':  [model_input],\n",
645
    "        'parameters': {\n",
646
    "        # \"stream\": \"true\",\n",
647
    "        'temperature': 0.3,\n",
648
    "        'min_new_tokens': 10,\n",
649
    "        'max_new_tokens': 200,\n",
650
    "        'stop_sequences': ['Question']\n",
651
    "    },\n",
652
    "}\n",
653
    "\n",
654
    "#bam\n",
655
    "# response = requests.post('https://bam-api.res.ibm.com/v1/generate', headers=headers, json=json_data)\n",
656
    "\n",
657
    "#Coga\n",
658
    "response = requests.post('https://fmaas-dev-api.bx.cloud9.ibm.com/v1/generate', headers=headers, json=json_data)\n",
659
    "\n",
660
    "\n",
661
    "json_response = json.loads(response.content.decode(\"utf-8\"))\n",
662
    "print(\"BAM OUTPUT: \", json_response)\n",
663
    "\n",
664
    "model_output1 = json_response['results'][0]['generated_text']\n",
665
    "model_output1 = model_output1.replace(\"Question\", '')\n",
666
    "model_output1 = model_output1.replace(\"Answer: \", '')\n",
667
    "\n",
668
    "# Seprate sentences\n",
669
    "sentences = model_output1.split(\". \")\n",
670
    "# remove duplicates SENTENCES\n",
671
    "unique_sentences = list( dict.fromkeys(sentences))\n",
672
    "\n",
673
    "if not model_output1.endswith(\".\"):\n",
674
    "# remove the last sentence if not . at last\n",
675
    "    unique_sentences.pop()\n",
676
    "\n",
677
    "# join unique sentences back into a text \n",
678
    "model_output = \". \".join(unique_sentences)+ \".\"\n",
679
    "\n",
680
    "print(\"FINAL ANSWER: \", model_output1)"
681
   ]
682
  },
683
  {
684
   "cell_type": "code",
685
   "execution_count": null,
686
   "id": "a487addb",
687
   "metadata": {},
688
   "outputs": [],
689
   "source": []
690
  }
691
 ],
692
 "metadata": {
693
  "kernelspec": {
694
   "display_name": "Python 3 (ipykernel)",
695
   "language": "python",
696
   "name": "python3"
697
  },
698
  "language_info": {
699
   "codemirror_mode": {
700
    "name": "ipython",
701
    "version": 3
702
   },
703
   "file_extension": ".py",
704
   "mimetype": "text/x-python",
705
   "name": "python",
706
   "nbconvert_exporter": "python",
707
   "pygments_lexer": "ipython3",
708
   "version": "3.10.9"
709
  }
710
 },
711
 "nbformat": 4,
712
 "nbformat_minor": 5
713
}
714
superknowa

Использование cookies