Amazing-Python-Scripts
369 строк · 17.0 Кб
1{
2"nbformat": 4,
3"nbformat_minor": 0,
4"metadata": {
5"colab": {
6"provenance": []
7},
8"kernelspec": {
9"name": "python3",
10"display_name": "Python 3"
11},
12"language_info": {
13"name": "python"
14}
15},
16"cells": [
17{
18"cell_type": "markdown",
19"source": [
20"#Libraries"
21],
22"metadata": {
23"id": "XmjSOfm5C7Y3"
24}
25},
26{
27"cell_type": "code",
28"execution_count": null,
29"metadata": {
30"colab": {
31"base_uri": "https://localhost:8080/"
32},
33"id": "3syypoOe4SZ0",
34"outputId": "b319cd48-1f8c-46aa-8e76-721f90fb13b9"
35},
36"outputs": [
37{
38"output_type": "stream",
39"name": "stdout",
40"text": [
41"Requirement already satisfied: ktrain in /usr/local/lib/python3.10/dist-packages (0.37.6)\n",
42"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.2.2)\n",
43"Requirement already satisfied: matplotlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (3.7.1)\n",
44"Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.5.3)\n",
45"Requirement already satisfied: fastprogress>=0.1.21 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.3)\n",
46"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.27.1)\n",
47"Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.3.1)\n",
48"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from ktrain) (23.1)\n",
49"Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.9)\n",
50"Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.42.1)\n",
51"Requirement already satisfied: cchardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.1.7)\n",
52"Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.0.0)\n",
53"Requirement already satisfied: syntok>1.3.3 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.4.4)\n",
54"Requirement already satisfied: tika in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.6.0)\n",
55"Requirement already satisfied: transformers>=4.17.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.31.0)\n",
56"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.1.99)\n",
57"Requirement already satisfied: keras-bert>=0.86.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.89.0)\n",
58"Requirement already satisfied: whoosh in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.7.4)\n",
59"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (1.22.4)\n",
60"Requirement already satisfied: keras-transformer==0.40.0 in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (0.40.0)\n",
61"Requirement already satisfied: keras-pos-embd==0.13.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.13.0)\n",
62"Requirement already satisfied: keras-multi-head==0.29.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.29.0)\n",
63"Requirement already satisfied: keras-layer-normalization==0.16.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.16.0)\n",
64"Requirement already satisfied: keras-position-wise-feed-forward==0.8.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.8.0)\n",
65"Requirement already satisfied: keras-embed-sim==0.10.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.10.0)\n",
66"Requirement already satisfied: keras-self-attention==0.51.0 in /usr/local/lib/python3.10/dist-packages (from keras-multi-head==0.29.0->keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.51.0)\n",
67"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.1.0)\n",
68"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (0.11.0)\n",
69"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (4.41.0)\n",
70"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.4.4)\n",
71"Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (8.4.0)\n",
72"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (3.1.0)\n",
73"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (2.8.2)\n",
74"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->ktrain) (2022.7.1)\n",
75"Requirement already satisfied: regex>2016 in /usr/local/lib/python3.10/dist-packages (from syntok>1.3.3->ktrain) (2022.10.31)\n",
76"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (3.12.2)\n",
77"Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.16.4)\n",
78"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (6.0.1)\n",
79"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.13.3)\n",
80"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.3.1)\n",
81"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (4.65.0)\n",
82"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect->ktrain) (1.16.0)\n",
83"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (1.26.16)\n",
84"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2023.5.7)\n",
85"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2.0.12)\n",
86"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (3.4)\n",
87"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (1.10.1)\n",
88"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (3.2.0)\n",
89"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tika->ktrain) (67.7.2)\n",
90"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (2023.6.0)\n",
91"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (4.7.1)\n"
92]
93}
94],
95"source": [
96"!pip3 install ktrain"
97]
98},
99{
100"cell_type": "code",
101"source": [
102"import os.path\n",
103"import numpy as np\n",
104"import ktrain\n",
105"from ktrain import text\n",
106"import tensorflow"
107],
108"metadata": {
109"id": "0ZejN0MU6dnb"
110},
111"execution_count": null,
112"outputs": []
113},
114{
115"cell_type": "markdown",
116"source": [
117"#Dataset"
118],
119"metadata": {
120"id": "oSJh43dYC_I4"
121}
122},
123{
124"cell_type": "code",
125"source": [
126"data=tensorflow.keras.utils.get_file(fname=\"aclImdb_v1.tar.gz\",origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",extract=True)"
127],
128"metadata": {
129"id": "navTD1Nu7NMH"
130},
131"execution_count": null,
132"outputs": []
133},
134{
135"cell_type": "code",
136"source": [
137"dir=os.path.join(os.path.dirname(data),\"aclImdb\")"
138],
139"metadata": {
140"id": "DJD9_h829wMX"
141},
142"execution_count": null,
143"outputs": []
144},
145{
146"cell_type": "code",
147"source": [
148"(x_train,y_train),(x_test,y_test),preproc=text.texts_from_folder(datadir=dir,classes=[\"pos\",\"neg\"],train_test_names=[\"train\",\"test\"],preprocess_mode=\"bert\")"
149],
150"metadata": {
151"colab": {
152"base_uri": "https://localhost:8080/",
153"height": 161
154},
155"id": "M84oU3gM-1zZ",
156"outputId": "4cfe9061-cd3f-4d21-8826-c78853d4e090"
157},
158"execution_count": null,
159"outputs": [
160{
161"output_type": "stream",
162"name": "stdout",
163"text": [
164"detected encoding: utf-8\n",
165"preprocessing train...\n",
166"language: en\n"
167]
168},
169{
170"output_type": "display_data",
171"data": {
172"text/plain": [
173"<IPython.core.display.HTML object>"
174],
175"text/html": [
176"\n",
177"<style>\n",
178" /* Turns off some styling */\n",
179" progress {\n",
180" /* gets rid of default border in Firefox and Opera. */\n",
181" border: none;\n",
182" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
183" background-size: auto;\n",
184" }\n",
185" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
186" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
187" }\n",
188" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
189" background: #F44336;\n",
190" }\n",
191"</style>\n"
192]
193},
194"metadata": {}
195},
196{
197"output_type": "display_data",
198"data": {
199"text/plain": [
200"<IPython.core.display.HTML object>"
201],
202"text/html": [
203"done."
204]
205},
206"metadata": {}
207},
208{
209"output_type": "stream",
210"name": "stdout",
211"text": [
212"Is Multi-Label? False\n",
213"preprocessing test...\n",
214"language: en\n"
215]
216},
217{
218"output_type": "display_data",
219"data": {
220"text/plain": [
221"<IPython.core.display.HTML object>"
222],
223"text/html": [
224"\n",
225"<style>\n",
226" /* Turns off some styling */\n",
227" progress {\n",
228" /* gets rid of default border in Firefox and Opera. */\n",
229" border: none;\n",
230" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
231" background-size: auto;\n",
232" }\n",
233" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
234" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
235" }\n",
236" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
237" background: #F44336;\n",
238" }\n",
239"</style>\n"
240]
241},
242"metadata": {}
243},
244{
245"output_type": "display_data",
246"data": {
247"text/plain": [
248"<IPython.core.display.HTML object>"
249],
250"text/html": [
251"done."
252]
253},
254"metadata": {}
255}
256]
257},
258{
259"cell_type": "markdown",
260"source": [
261"#BERT Model(Bidirectional Encoder Representations from Transformers)"
262],
263"metadata": {
264"id": "HsD1RIeyDDHi"
265}
266},
267{
268"cell_type": "code",
269"source": [
270"model=text.text_classifier(name=\"bert\",train_data=(x_train,y_train),preproc=preproc)"
271],
272"metadata": {
273"id": "egXY63ExDBG9",
274"colab": {
275"base_uri": "https://localhost:8080/"
276},
277"outputId": "9fec6679-1aeb-4098-e9d4-57cb869765cd"
278},
279"execution_count": null,
280"outputs": [
281{
282"output_type": "stream",
283"name": "stdout",
284"text": [
285"Is Multi-Label? False\n",
286"maxlen is 400\n"
287]
288},
289{
290"output_type": "stream",
291"name": "stderr",
292"text": [
293"/usr/local/lib/python3.10/dist-packages/keras/initializers/initializers.py:120: UserWarning: The initializer GlorotNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n",
294" warnings.warn(\n"
295]
296},
297{
298"output_type": "stream",
299"name": "stdout",
300"text": [
301"done.\n"
302]
303}
304]
305},
306{
307"cell_type": "code",
308"source": [
309"a=ktrain.get_learner(model=model,train_data=(x_train,y_train),val_data=(x_test,y_test),batch_size=32)"
310],
311"metadata": {
312"id": "ICtxz7LHaB1I",
313"colab": {
314"base_uri": "https://localhost:8080/"
315},
316"outputId": "c3b1c676-3fab-4445-e227-975f6a015e16"
317},
318"execution_count": null,
319"outputs": [
320{
321"output_type": "stream",
322"name": "stderr",
323"text": [
324"/usr/local/lib/python3.10/dist-packages/ktrain/__init__.py:100: UserWarning: For a GPU with 12GB of RAM, the following maxima apply:\n",
325" sequence len=64, max_batch_size=64\n",
326" sequence len=128, max_batch_size=32\n",
327" sequence len=256, max_batch_size=16\n",
328" sequence len=320, max_batch_size=14\n",
329" sequence len=384, max_batch_size=12\n",
330" sequence len=512, max_batch_size=6\n",
331"\n",
332" You've exceeded these limits.\n",
333" If using a GPU with <=12GB of memory, you may run out of memory during training.\n",
334" If necessary, adjust sequence length or batch size based on above.\n",
335" I.warnings.warn(msg)\n"
336]
337}
338]
339},
340{
341"cell_type": "code",
342"source": [
343"a.fit_onecycle(lr=2e-5,epochs=1)"
344],
345"metadata": {
346"id": "mAjZxMowbr_R",
347"colab": {
348"base_uri": "https://localhost:8080/",
349"height": 171
350},
351"outputId": "47cc0abe-4083-4cd5-cc8d-6d1ee3e5cb31"
352},
353"execution_count": null,
354"outputs": [
355{
356"output_type": "error",
357"ename": "NameError",
358"evalue": "ignored",
359"traceback": [
360"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
361"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
362"\u001b[0;32m<ipython-input-1-3c959640d8b7>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_onecycle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2e-5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
363"\u001b[0;31mNameError\u001b[0m: name 'a' is not defined"
364]
365}
366]
367}
368]
369}