Amazing-Python-Scripts

Форк
0
369 строк · 17.0 Кб
1
{
2
  "nbformat": 4,
3
  "nbformat_minor": 0,
4
  "metadata": {
5
    "colab": {
6
      "provenance": []
7
    },
8
    "kernelspec": {
9
      "name": "python3",
10
      "display_name": "Python 3"
11
    },
12
    "language_info": {
13
      "name": "python"
14
    }
15
  },
16
  "cells": [
17
    {
18
      "cell_type": "markdown",
19
      "source": [
20
        "#Libraries"
21
      ],
22
      "metadata": {
23
        "id": "XmjSOfm5C7Y3"
24
      }
25
    },
26
    {
27
      "cell_type": "code",
28
      "execution_count": null,
29
      "metadata": {
30
        "colab": {
31
          "base_uri": "https://localhost:8080/"
32
        },
33
        "id": "3syypoOe4SZ0",
34
        "outputId": "b319cd48-1f8c-46aa-8e76-721f90fb13b9"
35
      },
36
      "outputs": [
37
        {
38
          "output_type": "stream",
39
          "name": "stdout",
40
          "text": [
41
            "Requirement already satisfied: ktrain in /usr/local/lib/python3.10/dist-packages (0.37.6)\n",
42
            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.2.2)\n",
43
            "Requirement already satisfied: matplotlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (3.7.1)\n",
44
            "Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.5.3)\n",
45
            "Requirement already satisfied: fastprogress>=0.1.21 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.3)\n",
46
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.27.1)\n",
47
            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.3.1)\n",
48
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from ktrain) (23.1)\n",
49
            "Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.9)\n",
50
            "Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.42.1)\n",
51
            "Requirement already satisfied: cchardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.1.7)\n",
52
            "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.0.0)\n",
53
            "Requirement already satisfied: syntok>1.3.3 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.4.4)\n",
54
            "Requirement already satisfied: tika in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.6.0)\n",
55
            "Requirement already satisfied: transformers>=4.17.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.31.0)\n",
56
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.1.99)\n",
57
            "Requirement already satisfied: keras-bert>=0.86.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.89.0)\n",
58
            "Requirement already satisfied: whoosh in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.7.4)\n",
59
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (1.22.4)\n",
60
            "Requirement already satisfied: keras-transformer==0.40.0 in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (0.40.0)\n",
61
            "Requirement already satisfied: keras-pos-embd==0.13.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.13.0)\n",
62
            "Requirement already satisfied: keras-multi-head==0.29.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.29.0)\n",
63
            "Requirement already satisfied: keras-layer-normalization==0.16.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.16.0)\n",
64
            "Requirement already satisfied: keras-position-wise-feed-forward==0.8.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.8.0)\n",
65
            "Requirement already satisfied: keras-embed-sim==0.10.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.10.0)\n",
66
            "Requirement already satisfied: keras-self-attention==0.51.0 in /usr/local/lib/python3.10/dist-packages (from keras-multi-head==0.29.0->keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.51.0)\n",
67
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.1.0)\n",
68
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (0.11.0)\n",
69
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (4.41.0)\n",
70
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.4.4)\n",
71
            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (8.4.0)\n",
72
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (3.1.0)\n",
73
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (2.8.2)\n",
74
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->ktrain) (2022.7.1)\n",
75
            "Requirement already satisfied: regex>2016 in /usr/local/lib/python3.10/dist-packages (from syntok>1.3.3->ktrain) (2022.10.31)\n",
76
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (3.12.2)\n",
77
            "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.16.4)\n",
78
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (6.0.1)\n",
79
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.13.3)\n",
80
            "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.3.1)\n",
81
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (4.65.0)\n",
82
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect->ktrain) (1.16.0)\n",
83
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (1.26.16)\n",
84
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2023.5.7)\n",
85
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2.0.12)\n",
86
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (3.4)\n",
87
            "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (1.10.1)\n",
88
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (3.2.0)\n",
89
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tika->ktrain) (67.7.2)\n",
90
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (2023.6.0)\n",
91
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (4.7.1)\n"
92
          ]
93
        }
94
      ],
95
      "source": [
96
        "!pip3 install ktrain"
97
      ]
98
    },
99
    {
100
      "cell_type": "code",
101
      "source": [
102
        "import os.path\n",
103
        "import numpy as np\n",
104
        "import ktrain\n",
105
        "from ktrain import text\n",
106
        "import tensorflow"
107
      ],
108
      "metadata": {
109
        "id": "0ZejN0MU6dnb"
110
      },
111
      "execution_count": null,
112
      "outputs": []
113
    },
114
    {
115
      "cell_type": "markdown",
116
      "source": [
117
        "#Dataset"
118
      ],
119
      "metadata": {
120
        "id": "oSJh43dYC_I4"
121
      }
122
    },
123
    {
124
      "cell_type": "code",
125
      "source": [
126
        "data=tensorflow.keras.utils.get_file(fname=\"aclImdb_v1.tar.gz\",origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",extract=True)"
127
      ],
128
      "metadata": {
129
        "id": "navTD1Nu7NMH"
130
      },
131
      "execution_count": null,
132
      "outputs": []
133
    },
134
    {
135
      "cell_type": "code",
136
      "source": [
137
        "dir=os.path.join(os.path.dirname(data),\"aclImdb\")"
138
      ],
139
      "metadata": {
140
        "id": "DJD9_h829wMX"
141
      },
142
      "execution_count": null,
143
      "outputs": []
144
    },
145
    {
146
      "cell_type": "code",
147
      "source": [
148
        "(x_train,y_train),(x_test,y_test),preproc=text.texts_from_folder(datadir=dir,classes=[\"pos\",\"neg\"],train_test_names=[\"train\",\"test\"],preprocess_mode=\"bert\")"
149
      ],
150
      "metadata": {
151
        "colab": {
152
          "base_uri": "https://localhost:8080/",
153
          "height": 161
154
        },
155
        "id": "M84oU3gM-1zZ",
156
        "outputId": "4cfe9061-cd3f-4d21-8826-c78853d4e090"
157
      },
158
      "execution_count": null,
159
      "outputs": [
160
        {
161
          "output_type": "stream",
162
          "name": "stdout",
163
          "text": [
164
            "detected encoding: utf-8\n",
165
            "preprocessing train...\n",
166
            "language: en\n"
167
          ]
168
        },
169
        {
170
          "output_type": "display_data",
171
          "data": {
172
            "text/plain": [
173
              "<IPython.core.display.HTML object>"
174
            ],
175
            "text/html": [
176
              "\n",
177
              "<style>\n",
178
              "    /* Turns off some styling */\n",
179
              "    progress {\n",
180
              "        /* gets rid of default border in Firefox and Opera. */\n",
181
              "        border: none;\n",
182
              "        /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
183
              "        background-size: auto;\n",
184
              "    }\n",
185
              "    progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
186
              "        background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
187
              "    }\n",
188
              "    .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
189
              "        background: #F44336;\n",
190
              "    }\n",
191
              "</style>\n"
192
            ]
193
          },
194
          "metadata": {}
195
        },
196
        {
197
          "output_type": "display_data",
198
          "data": {
199
            "text/plain": [
200
              "<IPython.core.display.HTML object>"
201
            ],
202
            "text/html": [
203
              "done."
204
            ]
205
          },
206
          "metadata": {}
207
        },
208
        {
209
          "output_type": "stream",
210
          "name": "stdout",
211
          "text": [
212
            "Is Multi-Label? False\n",
213
            "preprocessing test...\n",
214
            "language: en\n"
215
          ]
216
        },
217
        {
218
          "output_type": "display_data",
219
          "data": {
220
            "text/plain": [
221
              "<IPython.core.display.HTML object>"
222
            ],
223
            "text/html": [
224
              "\n",
225
              "<style>\n",
226
              "    /* Turns off some styling */\n",
227
              "    progress {\n",
228
              "        /* gets rid of default border in Firefox and Opera. */\n",
229
              "        border: none;\n",
230
              "        /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
231
              "        background-size: auto;\n",
232
              "    }\n",
233
              "    progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
234
              "        background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
235
              "    }\n",
236
              "    .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
237
              "        background: #F44336;\n",
238
              "    }\n",
239
              "</style>\n"
240
            ]
241
          },
242
          "metadata": {}
243
        },
244
        {
245
          "output_type": "display_data",
246
          "data": {
247
            "text/plain": [
248
              "<IPython.core.display.HTML object>"
249
            ],
250
            "text/html": [
251
              "done."
252
            ]
253
          },
254
          "metadata": {}
255
        }
256
      ]
257
    },
258
    {
259
      "cell_type": "markdown",
260
      "source": [
261
        "#BERT Model(Bidirectional Encoder Representations from Transformers)"
262
      ],
263
      "metadata": {
264
        "id": "HsD1RIeyDDHi"
265
      }
266
    },
267
    {
268
      "cell_type": "code",
269
      "source": [
270
        "model=text.text_classifier(name=\"bert\",train_data=(x_train,y_train),preproc=preproc)"
271
      ],
272
      "metadata": {
273
        "id": "egXY63ExDBG9",
274
        "colab": {
275
          "base_uri": "https://localhost:8080/"
276
        },
277
        "outputId": "9fec6679-1aeb-4098-e9d4-57cb869765cd"
278
      },
279
      "execution_count": null,
280
      "outputs": [
281
        {
282
          "output_type": "stream",
283
          "name": "stdout",
284
          "text": [
285
            "Is Multi-Label? False\n",
286
            "maxlen is 400\n"
287
          ]
288
        },
289
        {
290
          "output_type": "stream",
291
          "name": "stderr",
292
          "text": [
293
            "/usr/local/lib/python3.10/dist-packages/keras/initializers/initializers.py:120: UserWarning: The initializer GlorotNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
294
            "  warnings.warn(\n"
295
          ]
296
        },
297
        {
298
          "output_type": "stream",
299
          "name": "stdout",
300
          "text": [
301
            "done.\n"
302
          ]
303
        }
304
      ]
305
    },
306
    {
307
      "cell_type": "code",
308
      "source": [
309
        "a=ktrain.get_learner(model=model,train_data=(x_train,y_train),val_data=(x_test,y_test),batch_size=32)"
310
      ],
311
      "metadata": {
312
        "id": "ICtxz7LHaB1I",
313
        "colab": {
314
          "base_uri": "https://localhost:8080/"
315
        },
316
        "outputId": "c3b1c676-3fab-4445-e227-975f6a015e16"
317
      },
318
      "execution_count": null,
319
      "outputs": [
320
        {
321
          "output_type": "stream",
322
          "name": "stderr",
323
          "text": [
324
            "/usr/local/lib/python3.10/dist-packages/ktrain/__init__.py:100: UserWarning: For a GPU with 12GB of RAM, the following maxima apply:\n",
325
            "        sequence len=64, max_batch_size=64\n",
326
            "        sequence len=128, max_batch_size=32\n",
327
            "        sequence len=256, max_batch_size=16\n",
328
            "        sequence len=320, max_batch_size=14\n",
329
            "        sequence len=384, max_batch_size=12\n",
330
            "        sequence len=512, max_batch_size=6\n",
331
            "\n",
332
            "        You've exceeded these limits.\n",
333
            "        If using a GPU with <=12GB of memory, you may run out of memory during training.\n",
334
            "        If necessary, adjust sequence length or batch size based on above.\n",
335
            "  I.warnings.warn(msg)\n"
336
          ]
337
        }
338
      ]
339
    },
340
    {
341
      "cell_type": "code",
342
      "source": [
343
        "a.fit_onecycle(lr=2e-5,epochs=1)"
344
      ],
345
      "metadata": {
346
        "id": "mAjZxMowbr_R",
347
        "colab": {
348
          "base_uri": "https://localhost:8080/",
349
          "height": 171
350
        },
351
        "outputId": "47cc0abe-4083-4cd5-cc8d-6d1ee3e5cb31"
352
      },
353
      "execution_count": null,
354
      "outputs": [
355
        {
356
          "output_type": "error",
357
          "ename": "NameError",
358
          "evalue": "ignored",
359
          "traceback": [
360
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
361
            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
362
            "\u001b[0;32m<ipython-input-1-3c959640d8b7>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_onecycle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2e-5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
363
            "\u001b[0;31mNameError\u001b[0m: name 'a' is not defined"
364
          ]
365
        }
366
      ]
367
    }
368
  ]
369
}

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.