python_for_analytics

Форк
0
/
3.5_HW_classification_and_clustering.ipynb 
808 строк · 114.6 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "id": "ae58ad99",
6
   "metadata": {},
7
   "source": [
8
    "<h1 align=\"center\">3.5 Задачи классификации и кластеризации</h1>\n",
9
    "\n",
10
    "Задание:\n",
11
    "- Возьмите датасет с цветками iris’а (функция load_iris из библиотеки sklearn)\n",
12
    "- Оставьте два признака - sepal_length и sepal_width и целевую переменную - variety\n",
13
    "- Разделите данные на выборку для обучения и тестирования\n",
14
    "- Постройте модель LDA\n",
15
    "- Визуализируйте предсказания для тестовой выборки и центры классов\n",
16
    "- Отбросьте целевую переменную и оставьте только два признака - sepal_length и sepal_width\n",
17
    "- Подберите оптимальное число кластеров для алгоритма kmeans и визуализируйте полученную кластеризацию"
18
   ]
19
  },
20
  {
21
   "cell_type": "code",
22
   "execution_count": null,
23
   "id": "227bd386",
24
   "metadata": {},
25
   "outputs": [],
26
   "source": [
27
    "import pandas as pd\n",
28
    "import matplotlib.pyplot as plt\n",
29
    "\n",
30
    "from sklearn.datasets import load_iris\n",
31
    "from sklearn.model_selection import train_test_split\n",
32
    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
33
    "from sklearn.metrics import accuracy_score\n",
34
    "from sklearn.cluster import KMeans"
35
   ]
36
  },
37
  {
38
   "cell_type": "code",
39
   "execution_count": 2,
40
   "id": "69c6c974",
41
   "metadata": {},
42
   "outputs": [],
43
   "source": [
44
    "iris = load_iris()"
45
   ]
46
  },
47
  {
48
   "cell_type": "code",
49
   "execution_count": 3,
50
   "id": "c82f8df0",
51
   "metadata": {},
52
   "outputs": [
53
    {
54
     "data": {
55
      "text/html": [
56
       "<div>\n",
57
       "<style scoped>\n",
58
       "    .dataframe tbody tr th:only-of-type {\n",
59
       "        vertical-align: middle;\n",
60
       "    }\n",
61
       "\n",
62
       "    .dataframe tbody tr th {\n",
63
       "        vertical-align: top;\n",
64
       "    }\n",
65
       "\n",
66
       "    .dataframe thead th {\n",
67
       "        text-align: right;\n",
68
       "    }\n",
69
       "</style>\n",
70
       "<table border=\"1\" class=\"dataframe\">\n",
71
       "  <thead>\n",
72
       "    <tr style=\"text-align: right;\">\n",
73
       "      <th></th>\n",
74
       "      <th>sepal length (cm)</th>\n",
75
       "      <th>sepal width (cm)</th>\n",
76
       "      <th>petal length (cm)</th>\n",
77
       "      <th>petal width (cm)</th>\n",
78
       "      <th>variety</th>\n",
79
       "    </tr>\n",
80
       "  </thead>\n",
81
       "  <tbody>\n",
82
       "    <tr>\n",
83
       "      <th>0</th>\n",
84
       "      <td>5.1</td>\n",
85
       "      <td>3.5</td>\n",
86
       "      <td>1.4</td>\n",
87
       "      <td>0.2</td>\n",
88
       "      <td>0</td>\n",
89
       "    </tr>\n",
90
       "    <tr>\n",
91
       "      <th>1</th>\n",
92
       "      <td>4.9</td>\n",
93
       "      <td>3.0</td>\n",
94
       "      <td>1.4</td>\n",
95
       "      <td>0.2</td>\n",
96
       "      <td>0</td>\n",
97
       "    </tr>\n",
98
       "    <tr>\n",
99
       "      <th>2</th>\n",
100
       "      <td>4.7</td>\n",
101
       "      <td>3.2</td>\n",
102
       "      <td>1.3</td>\n",
103
       "      <td>0.2</td>\n",
104
       "      <td>0</td>\n",
105
       "    </tr>\n",
106
       "    <tr>\n",
107
       "      <th>3</th>\n",
108
       "      <td>4.6</td>\n",
109
       "      <td>3.1</td>\n",
110
       "      <td>1.5</td>\n",
111
       "      <td>0.2</td>\n",
112
       "      <td>0</td>\n",
113
       "    </tr>\n",
114
       "    <tr>\n",
115
       "      <th>4</th>\n",
116
       "      <td>5.0</td>\n",
117
       "      <td>3.6</td>\n",
118
       "      <td>1.4</td>\n",
119
       "      <td>0.2</td>\n",
120
       "      <td>0</td>\n",
121
       "    </tr>\n",
122
       "    <tr>\n",
123
       "      <th>...</th>\n",
124
       "      <td>...</td>\n",
125
       "      <td>...</td>\n",
126
       "      <td>...</td>\n",
127
       "      <td>...</td>\n",
128
       "      <td>...</td>\n",
129
       "    </tr>\n",
130
       "    <tr>\n",
131
       "      <th>145</th>\n",
132
       "      <td>6.7</td>\n",
133
       "      <td>3.0</td>\n",
134
       "      <td>5.2</td>\n",
135
       "      <td>2.3</td>\n",
136
       "      <td>2</td>\n",
137
       "    </tr>\n",
138
       "    <tr>\n",
139
       "      <th>146</th>\n",
140
       "      <td>6.3</td>\n",
141
       "      <td>2.5</td>\n",
142
       "      <td>5.0</td>\n",
143
       "      <td>1.9</td>\n",
144
       "      <td>2</td>\n",
145
       "    </tr>\n",
146
       "    <tr>\n",
147
       "      <th>147</th>\n",
148
       "      <td>6.5</td>\n",
149
       "      <td>3.0</td>\n",
150
       "      <td>5.2</td>\n",
151
       "      <td>2.0</td>\n",
152
       "      <td>2</td>\n",
153
       "    </tr>\n",
154
       "    <tr>\n",
155
       "      <th>148</th>\n",
156
       "      <td>6.2</td>\n",
157
       "      <td>3.4</td>\n",
158
       "      <td>5.4</td>\n",
159
       "      <td>2.3</td>\n",
160
       "      <td>2</td>\n",
161
       "    </tr>\n",
162
       "    <tr>\n",
163
       "      <th>149</th>\n",
164
       "      <td>5.9</td>\n",
165
       "      <td>3.0</td>\n",
166
       "      <td>5.1</td>\n",
167
       "      <td>1.8</td>\n",
168
       "      <td>2</td>\n",
169
       "    </tr>\n",
170
       "  </tbody>\n",
171
       "</table>\n",
172
       "<p>150 rows × 5 columns</p>\n",
173
       "</div>"
174
      ],
175
      "text/plain": [
176
       "     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \\\n",
177
       "0                  5.1               3.5                1.4               0.2   \n",
178
       "1                  4.9               3.0                1.4               0.2   \n",
179
       "2                  4.7               3.2                1.3               0.2   \n",
180
       "3                  4.6               3.1                1.5               0.2   \n",
181
       "4                  5.0               3.6                1.4               0.2   \n",
182
       "..                 ...               ...                ...               ...   \n",
183
       "145                6.7               3.0                5.2               2.3   \n",
184
       "146                6.3               2.5                5.0               1.9   \n",
185
       "147                6.5               3.0                5.2               2.0   \n",
186
       "148                6.2               3.4                5.4               2.3   \n",
187
       "149                5.9               3.0                5.1               1.8   \n",
188
       "\n",
189
       "     variety  \n",
190
       "0          0  \n",
191
       "1          0  \n",
192
       "2          0  \n",
193
       "3          0  \n",
194
       "4          0  \n",
195
       "..       ...  \n",
196
       "145        2  \n",
197
       "146        2  \n",
198
       "147        2  \n",
199
       "148        2  \n",
200
       "149        2  \n",
201
       "\n",
202
       "[150 rows x 5 columns]"
203
      ]
204
     },
205
     "execution_count": 3,
206
     "metadata": {},
207
     "output_type": "execute_result"
208
    }
209
   ],
210
   "source": [
211
    "data = pd.DataFrame(iris.data, columns = iris.feature_names)\n",
212
    "data_class = data['variety'] = iris.target\n",
213
    "data"
214
   ]
215
  },
216
  {
217
   "cell_type": "code",
218
   "execution_count": 4,
219
   "id": "7bc24e97",
220
   "metadata": {},
221
   "outputs": [
222
    {
223
     "data": {
224
      "text/html": [
225
       "<div>\n",
226
       "<style scoped>\n",
227
       "    .dataframe tbody tr th:only-of-type {\n",
228
       "        vertical-align: middle;\n",
229
       "    }\n",
230
       "\n",
231
       "    .dataframe tbody tr th {\n",
232
       "        vertical-align: top;\n",
233
       "    }\n",
234
       "\n",
235
       "    .dataframe thead th {\n",
236
       "        text-align: right;\n",
237
       "    }\n",
238
       "</style>\n",
239
       "<table border=\"1\" class=\"dataframe\">\n",
240
       "  <thead>\n",
241
       "    <tr style=\"text-align: right;\">\n",
242
       "      <th></th>\n",
243
       "      <th>sepal length (cm)</th>\n",
244
       "      <th>sepal width (cm)</th>\n",
245
       "      <th>variety</th>\n",
246
       "    </tr>\n",
247
       "  </thead>\n",
248
       "  <tbody>\n",
249
       "    <tr>\n",
250
       "      <th>0</th>\n",
251
       "      <td>5.1</td>\n",
252
       "      <td>3.5</td>\n",
253
       "      <td>0</td>\n",
254
       "    </tr>\n",
255
       "    <tr>\n",
256
       "      <th>1</th>\n",
257
       "      <td>4.9</td>\n",
258
       "      <td>3.0</td>\n",
259
       "      <td>0</td>\n",
260
       "    </tr>\n",
261
       "    <tr>\n",
262
       "      <th>2</th>\n",
263
       "      <td>4.7</td>\n",
264
       "      <td>3.2</td>\n",
265
       "      <td>0</td>\n",
266
       "    </tr>\n",
267
       "    <tr>\n",
268
       "      <th>3</th>\n",
269
       "      <td>4.6</td>\n",
270
       "      <td>3.1</td>\n",
271
       "      <td>0</td>\n",
272
       "    </tr>\n",
273
       "    <tr>\n",
274
       "      <th>4</th>\n",
275
       "      <td>5.0</td>\n",
276
       "      <td>3.6</td>\n",
277
       "      <td>0</td>\n",
278
       "    </tr>\n",
279
       "    <tr>\n",
280
       "      <th>...</th>\n",
281
       "      <td>...</td>\n",
282
       "      <td>...</td>\n",
283
       "      <td>...</td>\n",
284
       "    </tr>\n",
285
       "    <tr>\n",
286
       "      <th>145</th>\n",
287
       "      <td>6.7</td>\n",
288
       "      <td>3.0</td>\n",
289
       "      <td>2</td>\n",
290
       "    </tr>\n",
291
       "    <tr>\n",
292
       "      <th>146</th>\n",
293
       "      <td>6.3</td>\n",
294
       "      <td>2.5</td>\n",
295
       "      <td>2</td>\n",
296
       "    </tr>\n",
297
       "    <tr>\n",
298
       "      <th>147</th>\n",
299
       "      <td>6.5</td>\n",
300
       "      <td>3.0</td>\n",
301
       "      <td>2</td>\n",
302
       "    </tr>\n",
303
       "    <tr>\n",
304
       "      <th>148</th>\n",
305
       "      <td>6.2</td>\n",
306
       "      <td>3.4</td>\n",
307
       "      <td>2</td>\n",
308
       "    </tr>\n",
309
       "    <tr>\n",
310
       "      <th>149</th>\n",
311
       "      <td>5.9</td>\n",
312
       "      <td>3.0</td>\n",
313
       "      <td>2</td>\n",
314
       "    </tr>\n",
315
       "  </tbody>\n",
316
       "</table>\n",
317
       "<p>150 rows × 3 columns</p>\n",
318
       "</div>"
319
      ],
320
      "text/plain": [
321
       "     sepal length (cm)  sepal width (cm)  variety\n",
322
       "0                  5.1               3.5        0\n",
323
       "1                  4.9               3.0        0\n",
324
       "2                  4.7               3.2        0\n",
325
       "3                  4.6               3.1        0\n",
326
       "4                  5.0               3.6        0\n",
327
       "..                 ...               ...      ...\n",
328
       "145                6.7               3.0        2\n",
329
       "146                6.3               2.5        2\n",
330
       "147                6.5               3.0        2\n",
331
       "148                6.2               3.4        2\n",
332
       "149                5.9               3.0        2\n",
333
       "\n",
334
       "[150 rows x 3 columns]"
335
      ]
336
     },
337
     "execution_count": 4,
338
     "metadata": {},
339
     "output_type": "execute_result"
340
    }
341
   ],
342
   "source": [
343
    "data = data[['sepal length (cm)' , 'sepal width (cm)', 'variety']]\n",
344
    "data"
345
   ]
346
  },
347
  {
348
   "cell_type": "code",
349
   "execution_count": 5,
350
   "id": "26b40370",
351
   "metadata": {},
352
   "outputs": [
353
    {
354
     "data": {
355
      "text/plain": [
356
       "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
357
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
358
       "       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
359
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
360
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
361
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
362
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])"
363
      ]
364
     },
365
     "execution_count": 5,
366
     "metadata": {},
367
     "output_type": "execute_result"
368
    }
369
   ],
370
   "source": [
371
    "data_class"
372
   ]
373
  },
374
  {
375
   "cell_type": "code",
376
   "execution_count": 6,
377
   "id": "53647fd8",
378
   "metadata": {},
379
   "outputs": [],
380
   "source": [
381
    "X_train, X_test, y_train, y_test = train_test_split(data, data_class, test_size=0.25, random_state=80)"
382
   ]
383
  },
384
  {
385
   "cell_type": "code",
386
   "execution_count": 7,
387
   "id": "3e0cda17",
388
   "metadata": {
389
    "collapsed": true
390
   },
391
   "outputs": [
392
    {
393
     "data": {
394
      "text/html": [
395
       "<div>\n",
396
       "<style scoped>\n",
397
       "    .dataframe tbody tr th:only-of-type {\n",
398
       "        vertical-align: middle;\n",
399
       "    }\n",
400
       "\n",
401
       "    .dataframe tbody tr th {\n",
402
       "        vertical-align: top;\n",
403
       "    }\n",
404
       "\n",
405
       "    .dataframe thead th {\n",
406
       "        text-align: right;\n",
407
       "    }\n",
408
       "</style>\n",
409
       "<table border=\"1\" class=\"dataframe\">\n",
410
       "  <thead>\n",
411
       "    <tr style=\"text-align: right;\">\n",
412
       "      <th></th>\n",
413
       "      <th>0</th>\n",
414
       "      <th>1</th>\n",
415
       "    </tr>\n",
416
       "  </thead>\n",
417
       "  <tbody>\n",
418
       "    <tr>\n",
419
       "      <th>0</th>\n",
420
       "      <td>0</td>\n",
421
       "      <td>0</td>\n",
422
       "    </tr>\n",
423
       "    <tr>\n",
424
       "      <th>1</th>\n",
425
       "      <td>0</td>\n",
426
       "      <td>0</td>\n",
427
       "    </tr>\n",
428
       "    <tr>\n",
429
       "      <th>2</th>\n",
430
       "      <td>0</td>\n",
431
       "      <td>0</td>\n",
432
       "    </tr>\n",
433
       "    <tr>\n",
434
       "      <th>3</th>\n",
435
       "      <td>1</td>\n",
436
       "      <td>2</td>\n",
437
       "    </tr>\n",
438
       "    <tr>\n",
439
       "      <th>4</th>\n",
440
       "      <td>0</td>\n",
441
       "      <td>0</td>\n",
442
       "    </tr>\n",
443
       "    <tr>\n",
444
       "      <th>5</th>\n",
445
       "      <td>1</td>\n",
446
       "      <td>1</td>\n",
447
       "    </tr>\n",
448
       "    <tr>\n",
449
       "      <th>6</th>\n",
450
       "      <td>1</td>\n",
451
       "      <td>1</td>\n",
452
       "    </tr>\n",
453
       "    <tr>\n",
454
       "      <th>7</th>\n",
455
       "      <td>0</td>\n",
456
       "      <td>0</td>\n",
457
       "    </tr>\n",
458
       "    <tr>\n",
459
       "      <th>8</th>\n",
460
       "      <td>0</td>\n",
461
       "      <td>0</td>\n",
462
       "    </tr>\n",
463
       "    <tr>\n",
464
       "      <th>9</th>\n",
465
       "      <td>0</td>\n",
466
       "      <td>0</td>\n",
467
       "    </tr>\n",
468
       "    <tr>\n",
469
       "      <th>10</th>\n",
470
       "      <td>2</td>\n",
471
       "      <td>2</td>\n",
472
       "    </tr>\n",
473
       "    <tr>\n",
474
       "      <th>11</th>\n",
475
       "      <td>1</td>\n",
476
       "      <td>1</td>\n",
477
       "    </tr>\n",
478
       "    <tr>\n",
479
       "      <th>12</th>\n",
480
       "      <td>1</td>\n",
481
       "      <td>1</td>\n",
482
       "    </tr>\n",
483
       "    <tr>\n",
484
       "      <th>13</th>\n",
485
       "      <td>0</td>\n",
486
       "      <td>0</td>\n",
487
       "    </tr>\n",
488
       "    <tr>\n",
489
       "      <th>14</th>\n",
490
       "      <td>2</td>\n",
491
       "      <td>2</td>\n",
492
       "    </tr>\n",
493
       "    <tr>\n",
494
       "      <th>15</th>\n",
495
       "      <td>1</td>\n",
496
       "      <td>1</td>\n",
497
       "    </tr>\n",
498
       "    <tr>\n",
499
       "      <th>16</th>\n",
500
       "      <td>0</td>\n",
501
       "      <td>0</td>\n",
502
       "    </tr>\n",
503
       "    <tr>\n",
504
       "      <th>17</th>\n",
505
       "      <td>1</td>\n",
506
       "      <td>1</td>\n",
507
       "    </tr>\n",
508
       "    <tr>\n",
509
       "      <th>18</th>\n",
510
       "      <td>0</td>\n",
511
       "      <td>0</td>\n",
512
       "    </tr>\n",
513
       "    <tr>\n",
514
       "      <th>19</th>\n",
515
       "      <td>0</td>\n",
516
       "      <td>0</td>\n",
517
       "    </tr>\n",
518
       "    <tr>\n",
519
       "      <th>20</th>\n",
520
       "      <td>0</td>\n",
521
       "      <td>0</td>\n",
522
       "    </tr>\n",
523
       "    <tr>\n",
524
       "      <th>21</th>\n",
525
       "      <td>2</td>\n",
526
       "      <td>2</td>\n",
527
       "    </tr>\n",
528
       "    <tr>\n",
529
       "      <th>22</th>\n",
530
       "      <td>2</td>\n",
531
       "      <td>2</td>\n",
532
       "    </tr>\n",
533
       "    <tr>\n",
534
       "      <th>23</th>\n",
535
       "      <td>1</td>\n",
536
       "      <td>1</td>\n",
537
       "    </tr>\n",
538
       "    <tr>\n",
539
       "      <th>24</th>\n",
540
       "      <td>1</td>\n",
541
       "      <td>2</td>\n",
542
       "    </tr>\n",
543
       "    <tr>\n",
544
       "      <th>25</th>\n",
545
       "      <td>0</td>\n",
546
       "      <td>0</td>\n",
547
       "    </tr>\n",
548
       "    <tr>\n",
549
       "      <th>26</th>\n",
550
       "      <td>2</td>\n",
551
       "      <td>1</td>\n",
552
       "    </tr>\n",
553
       "    <tr>\n",
554
       "      <th>27</th>\n",
555
       "      <td>1</td>\n",
556
       "      <td>1</td>\n",
557
       "    </tr>\n",
558
       "    <tr>\n",
559
       "      <th>28</th>\n",
560
       "      <td>1</td>\n",
561
       "      <td>1</td>\n",
562
       "    </tr>\n",
563
       "    <tr>\n",
564
       "      <th>29</th>\n",
565
       "      <td>2</td>\n",
566
       "      <td>2</td>\n",
567
       "    </tr>\n",
568
       "    <tr>\n",
569
       "      <th>30</th>\n",
570
       "      <td>0</td>\n",
571
       "      <td>0</td>\n",
572
       "    </tr>\n",
573
       "    <tr>\n",
574
       "      <th>31</th>\n",
575
       "      <td>1</td>\n",
576
       "      <td>1</td>\n",
577
       "    </tr>\n",
578
       "    <tr>\n",
579
       "      <th>32</th>\n",
580
       "      <td>0</td>\n",
581
       "      <td>0</td>\n",
582
       "    </tr>\n",
583
       "    <tr>\n",
584
       "      <th>33</th>\n",
585
       "      <td>1</td>\n",
586
       "      <td>1</td>\n",
587
       "    </tr>\n",
588
       "    <tr>\n",
589
       "      <th>34</th>\n",
590
       "      <td>2</td>\n",
591
       "      <td>1</td>\n",
592
       "    </tr>\n",
593
       "    <tr>\n",
594
       "      <th>35</th>\n",
595
       "      <td>1</td>\n",
596
       "      <td>1</td>\n",
597
       "    </tr>\n",
598
       "    <tr>\n",
599
       "      <th>36</th>\n",
600
       "      <td>0</td>\n",
601
       "      <td>0</td>\n",
602
       "    </tr>\n",
603
       "    <tr>\n",
604
       "      <th>37</th>\n",
605
       "      <td>1</td>\n",
606
       "      <td>2</td>\n",
607
       "    </tr>\n",
608
       "  </tbody>\n",
609
       "</table>\n",
610
       "</div>"
611
      ],
612
      "text/plain": [
613
       "    0  1\n",
614
       "0   0  0\n",
615
       "1   0  0\n",
616
       "2   0  0\n",
617
       "3   1  2\n",
618
       "4   0  0\n",
619
       "5   1  1\n",
620
       "6   1  1\n",
621
       "7   0  0\n",
622
       "8   0  0\n",
623
       "9   0  0\n",
624
       "10  2  2\n",
625
       "11  1  1\n",
626
       "12  1  1\n",
627
       "13  0  0\n",
628
       "14  2  2\n",
629
       "15  1  1\n",
630
       "16  0  0\n",
631
       "17  1  1\n",
632
       "18  0  0\n",
633
       "19  0  0\n",
634
       "20  0  0\n",
635
       "21  2  2\n",
636
       "22  2  2\n",
637
       "23  1  1\n",
638
       "24  1  2\n",
639
       "25  0  0\n",
640
       "26  2  1\n",
641
       "27  1  1\n",
642
       "28  1  1\n",
643
       "29  2  2\n",
644
       "30  0  0\n",
645
       "31  1  1\n",
646
       "32  0  0\n",
647
       "33  1  1\n",
648
       "34  2  1\n",
649
       "35  1  1\n",
650
       "36  0  0\n",
651
       "37  1  2"
652
      ]
653
     },
654
     "execution_count": 7,
655
     "metadata": {},
656
     "output_type": "execute_result"
657
    }
658
   ],
659
   "source": [
660
    "lda = LinearDiscriminantAnalysis()\n",
661
    "lda.fit(X_train, y_train)\n",
662
    "y_pred = lda.predict(X_test)\n",
663
    "\n",
664
    "result = pd.DataFrame([y_test, y_pred]).T\n",
665
    "result"
666
   ]
667
  },
668
  {
669
   "cell_type": "code",
670
   "execution_count": 9,
671
   "id": "f0377999",
672
   "metadata": {},
673
   "outputs": [
674
    {
675
     "data": {
676
      "text/plain": [
677
       "0.868421052631579"
678
      ]
679
     },
680
     "execution_count": 9,
681
     "metadata": {},
682
     "output_type": "execute_result"
683
    }
684
   ],
685
   "source": [
686
    "accuracy_score(y_test, y_pred)"
687
   ]
688
  },
689
  {
690
   "cell_type": "code",
691
   "execution_count": 27,
692
   "id": "d5ae009f",
693
   "metadata": {},
694
   "outputs": [
695
    {
696
     "data": {
697
      "image/png": "\n",
698
      "text/plain": [
699
       "<Figure size 640x480 with 1 Axes>"
700
      ]
701
     },
702
     "metadata": {},
703
     "output_type": "display_data"
704
    }
705
   ],
706
   "source": [
707
    "plt.scatter(X_test['sepal length (cm)'], X_test['sepal width (cm)'], c=y_pred)\n",
708
    "plt.scatter(lda.means_[:, 0], lda.means_[:, 1], c='blue', marker='.')\n",
709
    "plt.xlabel('sepal length')\n",
710
    "plt.ylabel('sepal width')\n",
711
    "plt.show()"
712
   ]
713
  },
714
  {
715
   "cell_type": "code",
716
   "execution_count": 19,
717
   "id": "ddb40ab9",
718
   "metadata": {},
719
   "outputs": [
720
    {
721
     "name": "stderr",
722
     "output_type": "stream",
723
     "text": [
724
      "C:\\Users\\shali\\anaconda3\\lib\\site-packages\\sklearn\\cluster\\_kmeans.py:1036: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.\n",
725
      "  warnings.warn(\n"
726
     ]
727
    },
728
    {
729
     "data": {
730
      "image/png": "\n",
731
      "text/plain": [
732
       "<Figure size 640x480 with 1 Axes>"
733
      ]
734
     },
735
     "metadata": {},
736
     "output_type": "display_data"
737
    }
738
   ],
739
   "source": [
740
    "inertia = []\n",
741
    "ks = range(1, 10)\n",
742
    "\n",
743
    "for i in ks:\n",
744
    "    k_means = KMeans(n_clusters=i)\n",
745
    "    k_means.fit_predict(data)\n",
746
    "    inertia.append(k_means.inertia_)\n",
747
    "    \n",
748
    "plt.plot(ks, inertia)\n",
749
    "plt.plot(ks, inertia ,'ro')\n",
750
    "plt.text(3, 45, 'оптимальное число кластеров')\n",
751
    "plt.show()"
752
   ]
753
  },
754
  {
755
   "cell_type": "code",
756
   "execution_count": 20,
757
   "id": "4caa5986",
758
   "metadata": {},
759
   "outputs": [
760
    {
761
     "data": {
762
      "image/png": "\n",
763
      "text/plain": [
764
       "<Figure size 640x480 with 1 Axes>"
765
      ]
766
     },
767
     "metadata": {},
768
     "output_type": "display_data"
769
    }
770
   ],
771
   "source": [
772
    "k_means = KMeans(n_clusters=3)\n",
773
    "clusters = k_means.fit_predict(data)\n",
774
    "plt.scatter(data['sepal length (cm)'], data['sepal width (cm)'], c=clusters)\n",
775
    "plt.show()"
776
   ]
777
  },
778
  {
779
   "cell_type": "markdown",
780
   "id": "559a3c3b",
781
   "metadata": {},
782
   "source": [
783
    "![](Scikit_learn_logo.png)"
784
   ]
785
  }
786
 ],
787
 "metadata": {
788
  "kernelspec": {
789
   "display_name": "Python 3 (ipykernel)",
790
   "language": "python",
791
   "name": "python3"
792
  },
793
  "language_info": {
794
   "codemirror_mode": {
795
    "name": "ipython",
796
    "version": 3
797
   },
798
   "file_extension": ".py",
799
   "mimetype": "text/x-python",
800
   "name": "python",
801
   "nbconvert_exporter": "python",
802
   "pygments_lexer": "ipython3",
803
   "version": "3.9.13"
804
  }
805
 },
806
 "nbformat": 4,
807
 "nbformat_minor": 5
808
}
809

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.