python_for_analytics

Форк
0
/
2.1_LEC_numpy_and_Word2vec.ipynb 
921 строка · 65.9 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "metadata": {},
6
   "source": [
7
    "# Немного про скорость\n",
8
    "- numpy\n",
9
    "- python\n",
10
    "- pandas"
11
   ]
12
  },
13
  {
14
   "cell_type": "code",
15
   "execution_count": 1,
16
   "metadata": {},
17
   "outputs": [],
18
   "source": [
19
    "import numpy as np\n",
20
    "from random import random"
21
   ]
22
  },
23
  {
24
   "cell_type": "markdown",
25
   "metadata": {},
26
   "source": [
27
    "### Задача 1\n",
28
    "Просто посчитать сумму чисел"
29
   ]
30
  },
31
  {
32
   "cell_type": "code",
33
   "execution_count": 2,
34
   "metadata": {},
35
   "outputs": [],
36
   "source": [
37
    "data = list(range(10**6))"
38
   ]
39
  },
40
  {
41
   "cell_type": "code",
42
   "execution_count": 3,
43
   "metadata": {},
44
   "outputs": [
45
    {
46
     "data": {
47
      "text/plain": [
48
       "[0, 1, 2, 3, 4]"
49
      ]
50
     },
51
     "execution_count": 3,
52
     "metadata": {},
53
     "output_type": "execute_result"
54
    }
55
   ],
56
   "source": [
57
    "data[:5]"
58
   ]
59
  },
60
  {
61
   "cell_type": "markdown",
62
   "metadata": {},
63
   "source": [
64
    "Однократное вычисление"
65
   ]
66
  },
67
  {
68
   "cell_type": "code",
69
   "execution_count": 4,
70
   "metadata": {
71
    "scrolled": true
72
   },
73
   "outputs": [
74
    {
75
     "name": "stdout",
76
     "output_type": "stream",
77
     "text": [
78
      "CPU times: user 97.1 ms, sys: 2.04 ms, total: 99.1 ms\n",
79
      "Wall time: 97.8 ms\n"
80
     ]
81
    },
82
    {
83
     "data": {
84
      "text/plain": [
85
       "499999500000"
86
      ]
87
     },
88
     "execution_count": 4,
89
     "metadata": {},
90
     "output_type": "execute_result"
91
    }
92
   ],
93
   "source": [
94
    "%%time\n",
95
    "\n",
96
    "total_sum = 0\n",
97
    "\n",
98
    "for num in data:\n",
99
    "    total_sum += num\n",
100
    "    \n",
101
    "total_sum"
102
   ]
103
  },
104
  {
105
   "cell_type": "code",
106
   "execution_count": 5,
107
   "metadata": {
108
    "scrolled": true
109
   },
110
   "outputs": [
111
    {
112
     "name": "stdout",
113
     "output_type": "stream",
114
     "text": [
115
      "CPU times: user 5.85 ms, sys: 58 µs, total: 5.9 ms\n",
116
      "Wall time: 5.92 ms\n"
117
     ]
118
    },
119
    {
120
     "data": {
121
      "text/plain": [
122
       "499999500000"
123
      ]
124
     },
125
     "execution_count": 5,
126
     "metadata": {},
127
     "output_type": "execute_result"
128
    }
129
   ],
130
   "source": [
131
    "%%time\n",
132
    "\n",
133
    "sum(data)"
134
   ]
135
  },
136
  {
137
   "cell_type": "markdown",
138
   "metadata": {},
139
   "source": [
140
    "Тест скорости"
141
   ]
142
  },
143
  {
144
   "cell_type": "code",
145
   "execution_count": 6,
146
   "metadata": {
147
    "scrolled": false
148
   },
149
   "outputs": [
150
    {
151
     "name": "stdout",
152
     "output_type": "stream",
153
     "text": [
154
      "CPU times: user 5.49 s, sys: 24.4 ms, total: 5.52 s\n",
155
      "Wall time: 5.55 s\n"
156
     ]
157
    }
158
   ],
159
   "source": [
160
    "%%time\n",
161
    "\n",
162
    "for _ in range(10**3):  # _ - это намек, что переменная в цикле НЕ используется\n",
163
    "    result = sum(data)"
164
   ]
165
  },
166
  {
167
   "cell_type": "code",
168
   "execution_count": 11,
169
   "metadata": {},
170
   "outputs": [
171
    {
172
     "data": {
173
      "text/plain": [
174
       "677"
175
      ]
176
     },
177
     "execution_count": 11,
178
     "metadata": {},
179
     "output_type": "execute_result"
180
    }
181
   ],
182
   "source": [
183
    "_"
184
   ]
185
  },
186
  {
187
   "cell_type": "markdown",
188
   "metadata": {},
189
   "source": [
190
    "Еще вариант"
191
   ]
192
  },
193
  {
194
   "cell_type": "code",
195
   "execution_count": 7,
196
   "metadata": {},
197
   "outputs": [
198
    {
199
     "name": "stdout",
200
     "output_type": "stream",
201
     "text": [
202
      "5.49 ms ± 111 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
203
     ]
204
    }
205
   ],
206
   "source": [
207
    "%%timeit\n",
208
    "\n",
209
    "sum(data)"
210
   ]
211
  },
212
  {
213
   "cell_type": "markdown",
214
   "metadata": {},
215
   "source": [
216
    "Пробуем с массивом numpy"
217
   ]
218
  },
219
  {
220
   "cell_type": "code",
221
   "execution_count": 8,
222
   "metadata": {},
223
   "outputs": [
224
    {
225
     "data": {
226
      "text/plain": [
227
       "array([0, 1, 2, 3, 4])"
228
      ]
229
     },
230
     "execution_count": 8,
231
     "metadata": {},
232
     "output_type": "execute_result"
233
    }
234
   ],
235
   "source": [
236
    "data = np.arange(10**6)\n",
237
    "data[:5]"
238
   ]
239
  },
240
  {
241
   "cell_type": "markdown",
242
   "metadata": {},
243
   "source": [
244
    "# Профилировщик\n",
245
    "- PyCharm\n",
246
    "- VS code"
247
   ]
248
  },
249
  {
250
   "cell_type": "code",
251
   "execution_count": 9,
252
   "metadata": {
253
    "scrolled": true
254
   },
255
   "outputs": [
256
    {
257
     "name": "stdout",
258
     "output_type": "stream",
259
     "text": [
260
      "CPU times: user 545 ms, sys: 3.46 ms, total: 549 ms\n",
261
      "Wall time: 549 ms\n"
262
     ]
263
    }
264
   ],
265
   "source": [
266
    "%%time\n",
267
    "\n",
268
    "for _ in range(10**3):\n",
269
    "    result = np.sum(data)"
270
   ]
271
  },
272
  {
273
   "cell_type": "code",
274
   "execution_count": 10,
275
   "metadata": {},
276
   "outputs": [
277
    {
278
     "ename": "KeyboardInterrupt",
279
     "evalue": "",
280
     "output_type": "error",
281
     "traceback": [
282
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
283
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
284
      "\u001b[0;32m<timed exec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n",
285
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
286
     ]
287
    }
288
   ],
289
   "source": [
290
    "%%time\n",
291
    "\n",
292
    "for _ in range(10**3):\n",
293
    "    result = sum(data)"
294
   ]
295
  },
296
  {
297
   "attachments": {
298
    "rope.png": {
299
     "image/png": ""
300
    }
301
   },
302
   "cell_type": "markdown",
303
   "metadata": {},
304
   "source": [
305
    "### Задача 2\n",
306
    "Веревку длиной 1 метр разрезают в двух случайных местах. С какой вероятностью длина самого большого отрезка окажется более 50 сантиметров?\n",
307
    "![rope.png](attachment:rope.png)"
308
   ]
309
  },
310
  {
311
   "cell_type": "code",
312
   "execution_count": 14,
313
   "metadata": {},
314
   "outputs": [],
315
   "source": [
316
    "from random import random"
317
   ]
318
  },
319
  {
320
   "cell_type": "code",
321
   "execution_count": 22,
322
   "metadata": {},
323
   "outputs": [
324
    {
325
     "data": {
326
      "text/plain": [
327
       "(0.5976711992032095, 0.9661842897029447)"
328
      ]
329
     },
330
     "execution_count": 22,
331
     "metadata": {},
332
     "output_type": "execute_result"
333
    }
334
   ],
335
   "source": [
336
    "first = random()\n",
337
    "second = random()\n",
338
    "\n",
339
    "first, second"
340
   ]
341
  },
342
  {
343
   "cell_type": "code",
344
   "execution_count": null,
345
   "metadata": {},
346
   "outputs": [],
347
   "source": [
348
    "left_cut = min(first, second)\n",
349
    "right_cut = max(first, second)"
350
   ]
351
  },
352
  {
353
   "cell_type": "code",
354
   "execution_count": null,
355
   "metadata": {},
356
   "outputs": [],
357
   "source": [
358
    "if left_cut > 0.5\n",
359
    "if right_cut - left_cut > 0.5\n",
360
    "if right_cut < 0.5"
361
   ]
362
  },
363
  {
364
   "cell_type": "code",
365
   "execution_count": null,
366
   "metadata": {},
367
   "outputs": [],
368
   "source": []
369
  },
370
  {
371
   "cell_type": "code",
372
   "execution_count": 32,
373
   "metadata": {
374
    "scrolled": true
375
   },
376
   "outputs": [
377
    {
378
     "name": "stdout",
379
     "output_type": "stream",
380
     "text": [
381
      "0.7501787\n",
382
      "CPU times: user 5.91 s, sys: 35.4 ms, total: 5.95 s\n",
383
      "Wall time: 5.96 s\n"
384
     ]
385
    }
386
   ],
387
   "source": [
388
    "%%time\n",
389
    "\n",
390
    "N_ATTEMPTS = 10**7\n",
391
    "success_count = 0\n",
392
    "\n",
393
    "for _ in range(N_ATTEMPTS):\n",
394
    "    first_cut = random()\n",
395
    "    second_cut = random()\n",
396
    "    \n",
397
    "    left_cut = min(first_cut, second_cut)  # позиция левого надреза\n",
398
    "    right_cut = max(first_cut, second_cut)  # позиция правого надреза\n",
399
    "    \n",
400
    "    if (\n",
401
    "        left_cut >= 0.5 or  # левый кусок больше 50см\n",
402
    "        right_cut - left_cut >= 0.5 or  # кусок между надрезами больше 50см\n",
403
    "        1-right_cut >= 0.5  # правый кусок больше 50см\n",
404
    "    ):\n",
405
    "        success_count += 1\n",
406
    "        \n",
407
    "print(success_count / N_ATTEMPTS)"
408
   ]
409
  },
410
  {
411
   "cell_type": "markdown",
412
   "metadata": {},
413
   "source": [
414
    "Пробуем с помощью numpy"
415
   ]
416
  },
417
  {
418
   "cell_type": "code",
419
   "execution_count": 24,
420
   "metadata": {},
421
   "outputs": [
422
    {
423
     "data": {
424
      "text/plain": [
425
       "array([0.75778293, 0.8903409 , 0.37658031, 0.77807423, 0.58180176,\n",
426
       "       0.71740072, 0.27533437, 0.76562725, 0.57958218, 0.77943997])"
427
      ]
428
     },
429
     "execution_count": 24,
430
     "metadata": {},
431
     "output_type": "execute_result"
432
    }
433
   ],
434
   "source": [
435
    "np.random.random(10)"
436
   ]
437
  },
438
  {
439
   "cell_type": "code",
440
   "execution_count": 28,
441
   "metadata": {},
442
   "outputs": [
443
    {
444
     "data": {
445
      "text/plain": [
446
       "array([0.13294801, 0.77226362, 0.85692282, 0.29534272, 0.36666038,\n",
447
       "       0.37564865, 0.97125546, 0.43806988, 0.29694839, 0.83192201])"
448
      ]
449
     },
450
     "execution_count": 28,
451
     "metadata": {},
452
     "output_type": "execute_result"
453
    }
454
   ],
455
   "source": [
456
    "first_cut = np.random.random(10)\n",
457
    "first_cut"
458
   ]
459
  },
460
  {
461
   "cell_type": "code",
462
   "execution_count": 29,
463
   "metadata": {},
464
   "outputs": [
465
    {
466
     "data": {
467
      "text/plain": [
468
       "array([0, 1, 1, 0, 0, 0, 1, 0, 0, 1])"
469
      ]
470
     },
471
     "execution_count": 29,
472
     "metadata": {},
473
     "output_type": "execute_result"
474
    }
475
   ],
476
   "source": [
477
    "# np.where(условие, что подставить, когда условие выполнено, когда условие НЕ выполнено)\n",
478
    "\n",
479
    "np.where(first_cut > 0.5, 1, 0)"
480
   ]
481
  },
482
  {
483
   "cell_type": "code",
484
   "execution_count": 26,
485
   "metadata": {},
486
   "outputs": [],
487
   "source": [
488
    "first_cut = np.random.random(10**7)"
489
   ]
490
  },
491
  {
492
   "cell_type": "code",
493
   "execution_count": 27,
494
   "metadata": {},
495
   "outputs": [
496
    {
497
     "data": {
498
      "text/plain": [
499
       "80000096"
500
      ]
501
     },
502
     "execution_count": 27,
503
     "metadata": {},
504
     "output_type": "execute_result"
505
    }
506
   ],
507
   "source": [
508
    "first_cut.__sizeof__()"
509
   ]
510
  },
511
  {
512
   "cell_type": "code",
513
   "execution_count": 30,
514
   "metadata": {},
515
   "outputs": [
516
    {
517
     "name": "stdout",
518
     "output_type": "stream",
519
     "text": [
520
      "CPU times: user 1.6 s, sys: 90.3 ms, total: 1.69 s\n",
521
      "Wall time: 1.69 s\n"
522
     ]
523
    },
524
    {
525
     "data": {
526
      "text/plain": [
527
       "0.7499787"
528
      ]
529
     },
530
     "execution_count": 30,
531
     "metadata": {},
532
     "output_type": "execute_result"
533
    }
534
   ],
535
   "source": [
536
    "%%time\n",
537
    "\n",
538
    "first_cut = np.random.random(10**7)\n",
539
    "second_cut = np.random.random(10**7)\n",
540
    "\n",
541
    "sum(np.where(\n",
542
    "    ((first_cut < 0.5) & (second_cut < 0.5)) |   # | or, & and\n",
543
    "    ((first_cut > 0.5) & (second_cut > 0.5)) |\n",
544
    "    (abs(first_cut - second_cut) > 0.5), \n",
545
    "    1, 0)) / 10**7"
546
   ]
547
  },
548
  {
549
   "cell_type": "code",
550
   "execution_count": null,
551
   "metadata": {},
552
   "outputs": [],
553
   "source": [
554
    "# multiprocessing  Thread\n",
555
    "# asyncio - асинхронные вычисления"
556
   ]
557
  },
558
  {
559
   "cell_type": "code",
560
   "execution_count": null,
561
   "metadata": {},
562
   "outputs": [],
563
   "source": []
564
  },
565
  {
566
   "cell_type": "code",
567
   "execution_count": null,
568
   "metadata": {},
569
   "outputs": [],
570
   "source": []
571
  },
572
  {
573
   "cell_type": "markdown",
574
   "metadata": {},
575
   "source": [
576
    "# Метрики схожести текстов"
577
   ]
578
  },
579
  {
580
   "cell_type": "markdown",
581
   "metadata": {},
582
   "source": [
583
    "Расстояние Хемминга - число различающихся символов (у строк одинакового размера)"
584
   ]
585
  },
586
  {
587
   "cell_type": "code",
588
   "execution_count": 33,
589
   "metadata": {},
590
   "outputs": [],
591
   "source": [
592
    "signal_1 = '010010100101010101101011101010100101'\n",
593
    "signal_2 = '010110100101011101110011101010100101'"
594
   ]
595
  },
596
  {
597
   "cell_type": "code",
598
   "execution_count": 34,
599
   "metadata": {},
600
   "outputs": [
601
    {
602
     "name": "stdout",
603
     "output_type": "stream",
604
     "text": [
605
      "4\n"
606
     ]
607
    }
608
   ],
609
   "source": [
610
    "hamming_dist = 0\n",
611
    "\n",
612
    "for i, number in enumerate(signal_1):\n",
613
    "    if number != signal_2[i]:\n",
614
    "        hamming_dist += 1\n",
615
    "        \n",
616
    "print(hamming_dist)"
617
   ]
618
  },
619
  {
620
   "cell_type": "markdown",
621
   "metadata": {},
622
   "source": [
623
    "Расстояние Левенштейна"
624
   ]
625
  },
626
  {
627
   "cell_type": "code",
628
   "execution_count": 35,
629
   "metadata": {},
630
   "outputs": [],
631
   "source": [
632
    "import Levenshtein"
633
   ]
634
  },
635
  {
636
   "cell_type": "code",
637
   "execution_count": 36,
638
   "metadata": {},
639
   "outputs": [
640
    {
641
     "data": {
642
      "text/plain": [
643
       "4"
644
      ]
645
     },
646
     "execution_count": 36,
647
     "metadata": {},
648
     "output_type": "execute_result"
649
    }
650
   ],
651
   "source": [
652
    "Levenshtein.distance('иванов', 'петров')"
653
   ]
654
  },
655
  {
656
   "cell_type": "code",
657
   "execution_count": 37,
658
   "metadata": {},
659
   "outputs": [],
660
   "source": [
661
    "voice_transcription = 'Романова'  # результат расшифровки голоса"
662
   ]
663
  },
664
  {
665
   "cell_type": "code",
666
   "execution_count": 38,
667
   "metadata": {},
668
   "outputs": [],
669
   "source": [
670
    "team = ['Мединская', 'Лучникова', 'Шереметьева', 'Разгуляева', 'Романовская']  # список настоящих фамилий"
671
   ]
672
  },
673
  {
674
   "cell_type": "code",
675
   "execution_count": 39,
676
   "metadata": {},
677
   "outputs": [
678
    {
679
     "data": {
680
      "text/plain": [
681
       "[('Романовская', 3),\n",
682
       " ('Лучникова', 6),\n",
683
       " ('Мединская', 7),\n",
684
       " ('Разгуляева', 7),\n",
685
       " ('Шереметьева', 8)]"
686
      ]
687
     },
688
     "execution_count": 39,
689
     "metadata": {},
690
     "output_type": "execute_result"
691
    }
692
   ],
693
   "source": [
694
    "sorted([(name, Levenshtein.distance(name, voice_transcription)) for name in team], key=lambda x: x[1])"
695
   ]
696
  },
697
  {
698
   "cell_type": "code",
699
   "execution_count": null,
700
   "metadata": {},
701
   "outputs": [],
702
   "source": []
703
  },
704
  {
705
   "cell_type": "code",
706
   "execution_count": null,
707
   "metadata": {},
708
   "outputs": [],
709
   "source": []
710
  },
711
  {
712
   "cell_type": "markdown",
713
   "metadata": {},
714
   "source": [
715
    "# Word2vec"
716
   ]
717
  },
718
  {
719
   "cell_type": "markdown",
720
   "metadata": {},
721
   "source": [
722
    "- google --> пщщпду\n",
723
    "- однушка --> однокомнатная квартира\n",
724
    "\n",
725
    "[Пример](https://ai.intelligentonlinetools.com/ml/k-means-clustering-example-word2vec/) использования"
726
   ]
727
  },
728
  {
729
   "cell_type": "code",
730
   "execution_count": 40,
731
   "metadata": {},
732
   "outputs": [],
733
   "source": [
734
    "import word2vec"
735
   ]
736
  },
737
  {
738
   "cell_type": "code",
739
   "execution_count": null,
740
   "metadata": {
741
    "scrolled": true
742
   },
743
   "outputs": [],
744
   "source": [
745
    "# построение модели\n",
746
    "# word2vec.word2vec('keywords_only.csv', 'keywords.bin', size=100, binary=True, verbose=True)"
747
   ]
748
  },
749
  {
750
   "cell_type": "code",
751
   "execution_count": 41,
752
   "metadata": {},
753
   "outputs": [],
754
   "source": [
755
    "model = word2vec.load('keywords.bin')"
756
   ]
757
  },
758
  {
759
   "cell_type": "code",
760
   "execution_count": 42,
761
   "metadata": {
762
    "scrolled": true
763
   },
764
   "outputs": [
765
    {
766
     "data": {
767
      "text/plain": [
768
       "(5110, 100)"
769
      ]
770
     },
771
     "execution_count": 42,
772
     "metadata": {},
773
     "output_type": "execute_result"
774
    }
775
   ],
776
   "source": [
777
    "model.vectors.shape"
778
   ]
779
  },
780
  {
781
   "cell_type": "code",
782
   "execution_count": 43,
783
   "metadata": {},
784
   "outputs": [
785
    {
786
     "data": {
787
      "text/plain": [
788
       "[('доллар', 'курс', 0.9418648979417067),\n",
789
       " ('доллар', 'новости', 0.961252865976364),\n",
790
       " ('доллар', 'exist', 0.7393588306142083),\n",
791
       " ('курс', 'новости', 0.9320686871264429),\n",
792
       " ('курс', 'exist', 0.8172263099993266),\n",
793
       " ('новости', 'exist', 0.7475044615162911)]"
794
      ]
795
     },
796
     "execution_count": 43,
797
     "metadata": {},
798
     "output_type": "execute_result"
799
    }
800
   ],
801
   "source": [
802
    "model.distance('доллар', 'курс', 'новости', 'exist')"
803
   ]
804
  },
805
  {
806
   "cell_type": "code",
807
   "execution_count": 44,
808
   "metadata": {
809
    "scrolled": true
810
   },
811
   "outputs": [
812
    {
813
     "data": {
814
      "text/plain": [
815
       "[('фейсбук', 0.9973144427134036),\n",
816
       " ('реклама', 0.9967488335032968),\n",
817
       " ('контакт', 0.9937352354865503),\n",
818
       " ('gmail', 0.9913932694292359),\n",
819
       " ('одноклассники', 0.9911915278087111),\n",
820
       " ('мою', 0.988450608859537),\n",
821
       " ('odnoklassniki', 0.9882130270998799),\n",
822
       " ('майл', 0.9866659658020683),\n",
823
       " ('госуслуги', 0.9865898596239959),\n",
824
       " ('vk', 0.985478288550912)]"
825
      ]
826
     },
827
     "execution_count": 44,
828
     "metadata": {},
829
     "output_type": "execute_result"
830
    }
831
   ],
832
   "source": [
833
    "indexes, metrics = model.similar('вконтакте')\n",
834
    "model.generate_response(indexes, metrics).tolist()"
835
   ]
836
  },
837
  {
838
   "cell_type": "code",
839
   "execution_count": 45,
840
   "metadata": {},
841
   "outputs": [
842
    {
843
     "data": {
844
      "text/plain": [
845
       "[('дикого', 0.9660463414472039),\n",
846
       " ('дочь', 0.9654834067319502),\n",
847
       " ('перси', 0.9645626335378448),\n",
848
       " ('запада', 0.9634953006784215),\n",
849
       " ('одна', 0.9626509379556154),\n",
850
       " ('синий', 0.9625325953575207),\n",
851
       " ('папины', 0.9622590861211784),\n",
852
       " ('поли', 0.9617913888840744),\n",
853
       " ('мастер', 0.9613913633302174),\n",
854
       " ('зеленый', 0.9612019929452926)]"
855
      ]
856
     },
857
     "execution_count": 45,
858
     "metadata": {},
859
     "output_type": "execute_result"
860
    }
861
   ],
862
   "source": [
863
    "indexes, metrics = model.similar('замок')\n",
864
    "model.generate_response(indexes, metrics).tolist()"
865
   ]
866
  },
867
  {
868
   "cell_type": "code",
869
   "execution_count": 46,
870
   "metadata": {
871
    "scrolled": true
872
   },
873
   "outputs": [
874
    {
875
     "data": {
876
      "text/plain": [
877
       "[('иллюзия', 0.9746920189283443),\n",
878
       " ('обмана', 0.9739505786654822),\n",
879
       " ('союзники', 0.9726426792215386),\n",
880
       " ('механик', 0.9708741631776523),\n",
881
       " ('возвращайся', 0.9658919454200058),\n",
882
       " ('воскрешение', 0.9658354453664477),\n",
883
       " ('кредо', 0.9636937079052321),\n",
884
       " ('омерзительная', 0.9615733713655767),\n",
885
       " ('всех', 0.9599216386354856),\n",
886
       " ('серого', 0.9594825853024929)]"
887
      ]
888
     },
889
     "execution_count": 46,
890
     "metadata": {},
891
     "output_type": "execute_result"
892
    }
893
   ],
894
   "source": [
895
    "indexes, metrics = model.similar('фильм')\n",
896
    "model.generate_response(indexes, metrics).tolist()"
897
   ]
898
  }
899
 ],
900
 "metadata": {
901
  "kernelspec": {
902
   "display_name": "Python 3 (ipykernel)",
903
   "language": "python",
904
   "name": "python3"
905
  },
906
  "language_info": {
907
   "codemirror_mode": {
908
    "name": "ipython",
909
    "version": 3
910
   },
911
   "file_extension": ".py",
912
   "mimetype": "text/x-python",
913
   "name": "python",
914
   "nbconvert_exporter": "python",
915
   "pygments_lexer": "ipython3",
916
   "version": "3.9.13"
917
  }
918
 },
919
 "nbformat": 4,
920
 "nbformat_minor": 4
921
}
922

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.