python_for_analytics

Форк
0
/
3.2.1_LEC_pandas_apply_examples.ipynb 
2214 строк · 85.6 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "metadata": {},
6
   "source": [
7
    "# Создание столбцов на ходу\n",
8
    "- Лена - разработчик\n",
9
    "- Ваня - дизайнер\n",
10
    "- Настя - редактор"
11
   ]
12
  },
13
  {
14
   "cell_type": "code",
15
   "execution_count": 1,
16
   "metadata": {},
17
   "outputs": [],
18
   "source": [
19
    "import pandas as pd"
20
   ]
21
  },
22
  {
23
   "cell_type": "code",
24
   "execution_count": 2,
25
   "metadata": {},
26
   "outputs": [],
27
   "source": [
28
    "def departments(row):\n",
29
    "    # row['разработка'] = 1  # создается новый столбец\n",
30
    "    info = row['dicts']\n",
31
    "    # print(info)\n",
32
    "    \n",
33
    "    if 'Лена' in info:\n",
34
    "        row['разработка'] = info['Лена']\n",
35
    "        \n",
36
    "    if 'Настя' in info:\n",
37
    "        row['редакторы'] = info['Настя']\n",
38
    "    \n",
39
    "    return row"
40
   ]
41
  },
42
  {
43
   "cell_type": "code",
44
   "execution_count": 3,
45
   "metadata": {},
46
   "outputs": [
47
    {
48
     "ename": "NameError",
49
     "evalue": "name 'df' is not defined",
50
     "output_type": "error",
51
     "traceback": [
52
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
53
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
54
      "Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mapply(departments, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m.\u001b[39mfillna(\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m      2\u001b[0m df\n",
55
      "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
56
     ]
57
    }
58
   ],
59
   "source": [
60
    "df = df.apply(departments, axis=1).fillna(0)\n",
61
    "df"
62
   ]
63
  },
64
  {
65
   "cell_type": "code",
66
   "execution_count": 4,
67
   "metadata": {},
68
   "outputs": [
69
    {
70
     "ename": "NameError",
71
     "evalue": "name 'df' is not defined",
72
     "output_type": "error",
73
     "traceback": [
74
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
75
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
76
      "Cell \u001b[1;32mIn[4], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mastype({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mразработка\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mint32\u001b[39m\u001b[38;5;124m'\u001b[39m})\n",
77
      "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
78
     ]
79
    }
80
   ],
81
   "source": [
82
    "df.astype({'разработка': 'int32'})"
83
   ]
84
  },
85
  {
86
   "cell_type": "code",
87
   "execution_count": 5,
88
   "metadata": {},
89
   "outputs": [
90
    {
91
     "data": {
92
      "text/html": [
93
       "<div>\n",
94
       "<style scoped>\n",
95
       "    .dataframe tbody tr th:only-of-type {\n",
96
       "        vertical-align: middle;\n",
97
       "    }\n",
98
       "\n",
99
       "    .dataframe tbody tr th {\n",
100
       "        vertical-align: top;\n",
101
       "    }\n",
102
       "\n",
103
       "    .dataframe thead th {\n",
104
       "        text-align: right;\n",
105
       "    }\n",
106
       "</style>\n",
107
       "<table border=\"1\" class=\"dataframe\">\n",
108
       "  <thead>\n",
109
       "    <tr style=\"text-align: right;\">\n",
110
       "      <th></th>\n",
111
       "      <th>dicts</th>\n",
112
       "    </tr>\n",
113
       "  </thead>\n",
114
       "  <tbody>\n",
115
       "    <tr>\n",
116
       "      <th>0</th>\n",
117
       "      <td>{'Лена': 1, 'Ваня': 2}</td>\n",
118
       "    </tr>\n",
119
       "    <tr>\n",
120
       "      <th>1</th>\n",
121
       "      <td>{'Ваня': 3, 'Леша': 5}</td>\n",
122
       "    </tr>\n",
123
       "    <tr>\n",
124
       "      <th>2</th>\n",
125
       "      <td>{'Настя': 3}</td>\n",
126
       "    </tr>\n",
127
       "  </tbody>\n",
128
       "</table>\n",
129
       "</div>"
130
      ],
131
      "text/plain": [
132
       "                    dicts\n",
133
       "0  {'Лена': 1, 'Ваня': 2}\n",
134
       "1  {'Ваня': 3, 'Леша': 5}\n",
135
       "2            {'Настя': 3}"
136
      ]
137
     },
138
     "execution_count": 5,
139
     "metadata": {},
140
     "output_type": "execute_result"
141
    }
142
   ],
143
   "source": [
144
    "df = pd.DataFrame({'dicts': [{'Лена': 1, 'Ваня': 2}, {'Ваня': 3, 'Леша': 5}, {'Настя': 3}]})\n",
145
    "df"
146
   ]
147
  },
148
  {
149
   "cell_type": "code",
150
   "execution_count": null,
151
   "metadata": {},
152
   "outputs": [],
153
   "source": []
154
  },
155
  {
156
   "cell_type": "code",
157
   "execution_count": null,
158
   "metadata": {},
159
   "outputs": [],
160
   "source": []
161
  },
162
  {
163
   "cell_type": "code",
164
   "execution_count": null,
165
   "metadata": {},
166
   "outputs": [],
167
   "source": []
168
  },
169
  {
170
   "cell_type": "code",
171
   "execution_count": null,
172
   "metadata": {},
173
   "outputs": [],
174
   "source": []
175
  },
176
  {
177
   "cell_type": "markdown",
178
   "metadata": {},
179
   "source": [
180
    "# Группировки и оконные функции"
181
   ]
182
  },
183
  {
184
   "cell_type": "code",
185
   "execution_count": 17,
186
   "metadata": {
187
    "scrolled": true
188
   },
189
   "outputs": [
190
    {
191
     "data": {
192
      "text/html": [
193
       "<div>\n",
194
       "<style scoped>\n",
195
       "    .dataframe tbody tr th:only-of-type {\n",
196
       "        vertical-align: middle;\n",
197
       "    }\n",
198
       "\n",
199
       "    .dataframe tbody tr th {\n",
200
       "        vertical-align: top;\n",
201
       "    }\n",
202
       "\n",
203
       "    .dataframe thead th {\n",
204
       "        text-align: right;\n",
205
       "    }\n",
206
       "</style>\n",
207
       "<table border=\"1\" class=\"dataframe\">\n",
208
       "  <thead>\n",
209
       "    <tr style=\"text-align: right;\">\n",
210
       "      <th></th>\n",
211
       "      <th>region</th>\n",
212
       "      <th>city</th>\n",
213
       "      <th>sales</th>\n",
214
       "    </tr>\n",
215
       "  </thead>\n",
216
       "  <tbody>\n",
217
       "    <tr>\n",
218
       "      <th>0</th>\n",
219
       "      <td>СФО</td>\n",
220
       "      <td>Новосибирск</td>\n",
221
       "      <td>140</td>\n",
222
       "    </tr>\n",
223
       "    <tr>\n",
224
       "      <th>1</th>\n",
225
       "      <td>СФО</td>\n",
226
       "      <td>Томск</td>\n",
227
       "      <td>135</td>\n",
228
       "    </tr>\n",
229
       "    <tr>\n",
230
       "      <th>2</th>\n",
231
       "      <td>ДВФО</td>\n",
232
       "      <td>Владивосток</td>\n",
233
       "      <td>290</td>\n",
234
       "    </tr>\n",
235
       "    <tr>\n",
236
       "      <th>3</th>\n",
237
       "      <td>СФО</td>\n",
238
       "      <td>Новосибирск</td>\n",
239
       "      <td>95</td>\n",
240
       "    </tr>\n",
241
       "    <tr>\n",
242
       "      <th>4</th>\n",
243
       "      <td>ДВФО</td>\n",
244
       "      <td>Владивосток</td>\n",
245
       "      <td>100</td>\n",
246
       "    </tr>\n",
247
       "    <tr>\n",
248
       "      <th>5</th>\n",
249
       "      <td>ДВФО</td>\n",
250
       "      <td>Владивосток</td>\n",
251
       "      <td>50</td>\n",
252
       "    </tr>\n",
253
       "    <tr>\n",
254
       "      <th>6</th>\n",
255
       "      <td>ДВФО</td>\n",
256
       "      <td>Владивосток</td>\n",
257
       "      <td>40</td>\n",
258
       "    </tr>\n",
259
       "    <tr>\n",
260
       "      <th>7</th>\n",
261
       "      <td>ДВФО</td>\n",
262
       "      <td>Владивосток</td>\n",
263
       "      <td>120</td>\n",
264
       "    </tr>\n",
265
       "    <tr>\n",
266
       "      <th>8</th>\n",
267
       "      <td>СФО</td>\n",
268
       "      <td>Томск</td>\n",
269
       "      <td>190</td>\n",
270
       "    </tr>\n",
271
       "    <tr>\n",
272
       "      <th>9</th>\n",
273
       "      <td>ДВФО</td>\n",
274
       "      <td>Владивосток</td>\n",
275
       "      <td>200</td>\n",
276
       "    </tr>\n",
277
       "  </tbody>\n",
278
       "</table>\n",
279
       "</div>"
280
      ],
281
      "text/plain": [
282
       "  region         city  sales\n",
283
       "0    СФО  Новосибирск    140\n",
284
       "1    СФО        Томск    135\n",
285
       "2   ДВФО  Владивосток    290\n",
286
       "3    СФО  Новосибирск     95\n",
287
       "4   ДВФО  Владивосток    100\n",
288
       "5   ДВФО  Владивосток     50\n",
289
       "6   ДВФО  Владивосток     40\n",
290
       "7   ДВФО  Владивосток    120\n",
291
       "8    СФО        Томск    190\n",
292
       "9   ДВФО  Владивосток    200"
293
      ]
294
     },
295
     "execution_count": 17,
296
     "metadata": {},
297
     "output_type": "execute_result"
298
    }
299
   ],
300
   "source": [
301
    "df = pd.DataFrame(\n",
302
    "    {\n",
303
    "        'region': ['СФО', 'СФО', 'ДВФО', 'СФО', 'ДВФО', 'ДВФО', 'ДВФО', 'ДВФО', 'СФО', 'ДВФО'],\n",
304
    "        'city': ['Новосибирск', 'Томск', 'Владивосток', 'Новосибирск', 'Владивосток', 'Владивосток', 'Владивосток', 'Владивосток', 'Томск', 'Владивосток'],\n",
305
    "        'sales': [140, 135, 290, 95, 100, 50, 40, 120, 190, 200],\n",
306
    "    }\n",
307
    ")\n",
308
    "df"
309
   ]
310
  },
311
  {
312
   "cell_type": "code",
313
   "execution_count": 19,
314
   "metadata": {},
315
   "outputs": [
316
    {
317
     "data": {
318
      "text/plain": [
319
       "array(['СФО', 'ДВФО'], dtype=object)"
320
      ]
321
     },
322
     "execution_count": 19,
323
     "metadata": {},
324
     "output_type": "execute_result"
325
    }
326
   ],
327
   "source": [
328
    "df['region'].unique()"
329
   ]
330
  },
331
  {
332
   "cell_type": "code",
333
   "execution_count": 20,
334
   "metadata": {},
335
   "outputs": [
336
    {
337
     "data": {
338
      "text/html": [
339
       "<div>\n",
340
       "<style scoped>\n",
341
       "    .dataframe tbody tr th:only-of-type {\n",
342
       "        vertical-align: middle;\n",
343
       "    }\n",
344
       "\n",
345
       "    .dataframe tbody tr th {\n",
346
       "        vertical-align: top;\n",
347
       "    }\n",
348
       "\n",
349
       "    .dataframe thead th {\n",
350
       "        text-align: right;\n",
351
       "    }\n",
352
       "</style>\n",
353
       "<table border=\"1\" class=\"dataframe\">\n",
354
       "  <thead>\n",
355
       "    <tr style=\"text-align: right;\">\n",
356
       "      <th></th>\n",
357
       "      <th>region</th>\n",
358
       "      <th>city</th>\n",
359
       "      <th>sales</th>\n",
360
       "    </tr>\n",
361
       "  </thead>\n",
362
       "  <tbody>\n",
363
       "    <tr>\n",
364
       "      <th>0</th>\n",
365
       "      <td>СФО</td>\n",
366
       "      <td>Новосибирск</td>\n",
367
       "      <td>140</td>\n",
368
       "    </tr>\n",
369
       "    <tr>\n",
370
       "      <th>1</th>\n",
371
       "      <td>СФО</td>\n",
372
       "      <td>Томск</td>\n",
373
       "      <td>135</td>\n",
374
       "    </tr>\n",
375
       "    <tr>\n",
376
       "      <th>3</th>\n",
377
       "      <td>СФО</td>\n",
378
       "      <td>Новосибирск</td>\n",
379
       "      <td>95</td>\n",
380
       "    </tr>\n",
381
       "    <tr>\n",
382
       "      <th>8</th>\n",
383
       "      <td>СФО</td>\n",
384
       "      <td>Томск</td>\n",
385
       "      <td>190</td>\n",
386
       "    </tr>\n",
387
       "  </tbody>\n",
388
       "</table>\n",
389
       "</div>"
390
      ],
391
      "text/plain": [
392
       "  region         city  sales\n",
393
       "0    СФО  Новосибирск    140\n",
394
       "1    СФО        Томск    135\n",
395
       "3    СФО  Новосибирск     95\n",
396
       "8    СФО        Томск    190"
397
      ]
398
     },
399
     "execution_count": 20,
400
     "metadata": {},
401
     "output_type": "execute_result"
402
    }
403
   ],
404
   "source": [
405
    "df_sfo = df[df['region'] == 'СФО']\n",
406
    "df_sfo"
407
   ]
408
  },
409
  {
410
   "cell_type": "code",
411
   "execution_count": 22,
412
   "metadata": {},
413
   "outputs": [
414
    {
415
     "data": {
416
      "text/plain": [
417
       "region    4\n",
418
       "city      4\n",
419
       "sales     4\n",
420
       "dtype: int64"
421
      ]
422
     },
423
     "execution_count": 22,
424
     "metadata": {},
425
     "output_type": "execute_result"
426
    }
427
   ],
428
   "source": [
429
    "df_sfo.count()  # количество строк с ненулевыми значениями"
430
   ]
431
  },
432
  {
433
   "cell_type": "code",
434
   "execution_count": 18,
435
   "metadata": {},
436
   "outputs": [
437
    {
438
     "data": {
439
      "text/html": [
440
       "<div>\n",
441
       "<style scoped>\n",
442
       "    .dataframe tbody tr th:only-of-type {\n",
443
       "        vertical-align: middle;\n",
444
       "    }\n",
445
       "\n",
446
       "    .dataframe tbody tr th {\n",
447
       "        vertical-align: top;\n",
448
       "    }\n",
449
       "\n",
450
       "    .dataframe thead th {\n",
451
       "        text-align: right;\n",
452
       "    }\n",
453
       "</style>\n",
454
       "<table border=\"1\" class=\"dataframe\">\n",
455
       "  <thead>\n",
456
       "    <tr style=\"text-align: right;\">\n",
457
       "      <th></th>\n",
458
       "      <th>city</th>\n",
459
       "      <th>sales</th>\n",
460
       "    </tr>\n",
461
       "    <tr>\n",
462
       "      <th>region</th>\n",
463
       "      <th></th>\n",
464
       "      <th></th>\n",
465
       "    </tr>\n",
466
       "  </thead>\n",
467
       "  <tbody>\n",
468
       "    <tr>\n",
469
       "      <th>ДВФО</th>\n",
470
       "      <td>6</td>\n",
471
       "      <td>6</td>\n",
472
       "    </tr>\n",
473
       "    <tr>\n",
474
       "      <th>СФО</th>\n",
475
       "      <td>4</td>\n",
476
       "      <td>4</td>\n",
477
       "    </tr>\n",
478
       "  </tbody>\n",
479
       "</table>\n",
480
       "</div>"
481
      ],
482
      "text/plain": [
483
       "        city  sales\n",
484
       "region             \n",
485
       "ДВФО       6      6\n",
486
       "СФО        4      4"
487
      ]
488
     },
489
     "execution_count": 18,
490
     "metadata": {},
491
     "output_type": "execute_result"
492
    }
493
   ],
494
   "source": [
495
    "df.groupby('region').count()"
496
   ]
497
  },
498
  {
499
   "cell_type": "code",
500
   "execution_count": null,
501
   "metadata": {},
502
   "outputs": [],
503
   "source": []
504
  },
505
  {
506
   "cell_type": "markdown",
507
   "metadata": {},
508
   "source": [
509
    "Посчитать размах значений"
510
   ]
511
  },
512
  {
513
   "cell_type": "code",
514
   "execution_count": 23,
515
   "metadata": {},
516
   "outputs": [
517
    {
518
     "data": {
519
      "text/html": [
520
       "<div>\n",
521
       "<style scoped>\n",
522
       "    .dataframe tbody tr th:only-of-type {\n",
523
       "        vertical-align: middle;\n",
524
       "    }\n",
525
       "\n",
526
       "    .dataframe tbody tr th {\n",
527
       "        vertical-align: top;\n",
528
       "    }\n",
529
       "\n",
530
       "    .dataframe thead th {\n",
531
       "        text-align: right;\n",
532
       "    }\n",
533
       "</style>\n",
534
       "<table border=\"1\" class=\"dataframe\">\n",
535
       "  <thead>\n",
536
       "    <tr style=\"text-align: right;\">\n",
537
       "      <th></th>\n",
538
       "      <th>region</th>\n",
539
       "      <th>city</th>\n",
540
       "      <th>sales</th>\n",
541
       "    </tr>\n",
542
       "  </thead>\n",
543
       "  <tbody>\n",
544
       "    <tr>\n",
545
       "      <th>0</th>\n",
546
       "      <td>СФО</td>\n",
547
       "      <td>Новосибирск</td>\n",
548
       "      <td>140</td>\n",
549
       "    </tr>\n",
550
       "    <tr>\n",
551
       "      <th>1</th>\n",
552
       "      <td>СФО</td>\n",
553
       "      <td>Томск</td>\n",
554
       "      <td>135</td>\n",
555
       "    </tr>\n",
556
       "    <tr>\n",
557
       "      <th>2</th>\n",
558
       "      <td>ДВФО</td>\n",
559
       "      <td>Владивосток</td>\n",
560
       "      <td>290</td>\n",
561
       "    </tr>\n",
562
       "    <tr>\n",
563
       "      <th>3</th>\n",
564
       "      <td>СФО</td>\n",
565
       "      <td>Новосибирск</td>\n",
566
       "      <td>95</td>\n",
567
       "    </tr>\n",
568
       "    <tr>\n",
569
       "      <th>4</th>\n",
570
       "      <td>ДВФО</td>\n",
571
       "      <td>Владивосток</td>\n",
572
       "      <td>100</td>\n",
573
       "    </tr>\n",
574
       "    <tr>\n",
575
       "      <th>5</th>\n",
576
       "      <td>ДВФО</td>\n",
577
       "      <td>Владивосток</td>\n",
578
       "      <td>50</td>\n",
579
       "    </tr>\n",
580
       "    <tr>\n",
581
       "      <th>6</th>\n",
582
       "      <td>ДВФО</td>\n",
583
       "      <td>Владивосток</td>\n",
584
       "      <td>40</td>\n",
585
       "    </tr>\n",
586
       "    <tr>\n",
587
       "      <th>7</th>\n",
588
       "      <td>ДВФО</td>\n",
589
       "      <td>Владивосток</td>\n",
590
       "      <td>120</td>\n",
591
       "    </tr>\n",
592
       "    <tr>\n",
593
       "      <th>8</th>\n",
594
       "      <td>СФО</td>\n",
595
       "      <td>Томск</td>\n",
596
       "      <td>190</td>\n",
597
       "    </tr>\n",
598
       "    <tr>\n",
599
       "      <th>9</th>\n",
600
       "      <td>ДВФО</td>\n",
601
       "      <td>Владивосток</td>\n",
602
       "      <td>200</td>\n",
603
       "    </tr>\n",
604
       "  </tbody>\n",
605
       "</table>\n",
606
       "</div>"
607
      ],
608
      "text/plain": [
609
       "  region         city  sales\n",
610
       "0    СФО  Новосибирск    140\n",
611
       "1    СФО        Томск    135\n",
612
       "2   ДВФО  Владивосток    290\n",
613
       "3    СФО  Новосибирск     95\n",
614
       "4   ДВФО  Владивосток    100\n",
615
       "5   ДВФО  Владивосток     50\n",
616
       "6   ДВФО  Владивосток     40\n",
617
       "7   ДВФО  Владивосток    120\n",
618
       "8    СФО        Томск    190\n",
619
       "9   ДВФО  Владивосток    200"
620
      ]
621
     },
622
     "execution_count": 23,
623
     "metadata": {},
624
     "output_type": "execute_result"
625
    }
626
   ],
627
   "source": [
628
    "df"
629
   ]
630
  },
631
  {
632
   "cell_type": "code",
633
   "execution_count": 24,
634
   "metadata": {},
635
   "outputs": [
636
    {
637
     "data": {
638
      "text/html": [
639
       "<div>\n",
640
       "<style scoped>\n",
641
       "    .dataframe tbody tr th:only-of-type {\n",
642
       "        vertical-align: middle;\n",
643
       "    }\n",
644
       "\n",
645
       "    .dataframe tbody tr th {\n",
646
       "        vertical-align: top;\n",
647
       "    }\n",
648
       "\n",
649
       "    .dataframe thead th {\n",
650
       "        text-align: right;\n",
651
       "    }\n",
652
       "</style>\n",
653
       "<table border=\"1\" class=\"dataframe\">\n",
654
       "  <thead>\n",
655
       "    <tr style=\"text-align: right;\">\n",
656
       "      <th></th>\n",
657
       "      <th>region</th>\n",
658
       "      <th>city</th>\n",
659
       "      <th>sales</th>\n",
660
       "    </tr>\n",
661
       "  </thead>\n",
662
       "  <tbody>\n",
663
       "    <tr>\n",
664
       "      <th>0</th>\n",
665
       "      <td>СФО</td>\n",
666
       "      <td>Новосибирск</td>\n",
667
       "      <td>140</td>\n",
668
       "    </tr>\n",
669
       "    <tr>\n",
670
       "      <th>1</th>\n",
671
       "      <td>СФО</td>\n",
672
       "      <td>Томск</td>\n",
673
       "      <td>135</td>\n",
674
       "    </tr>\n",
675
       "    <tr>\n",
676
       "      <th>3</th>\n",
677
       "      <td>СФО</td>\n",
678
       "      <td>Новосибирск</td>\n",
679
       "      <td>95</td>\n",
680
       "    </tr>\n",
681
       "    <tr>\n",
682
       "      <th>8</th>\n",
683
       "      <td>СФО</td>\n",
684
       "      <td>Томск</td>\n",
685
       "      <td>190</td>\n",
686
       "    </tr>\n",
687
       "  </tbody>\n",
688
       "</table>\n",
689
       "</div>"
690
      ],
691
      "text/plain": [
692
       "  region         city  sales\n",
693
       "0    СФО  Новосибирск    140\n",
694
       "1    СФО        Томск    135\n",
695
       "3    СФО  Новосибирск     95\n",
696
       "8    СФО        Томск    190"
697
      ]
698
     },
699
     "execution_count": 24,
700
     "metadata": {},
701
     "output_type": "execute_result"
702
    }
703
   ],
704
   "source": [
705
    "df_sfo"
706
   ]
707
  },
708
  {
709
   "cell_type": "code",
710
   "execution_count": 25,
711
   "metadata": {},
712
   "outputs": [
713
    {
714
     "data": {
715
      "text/plain": [
716
       "95"
717
      ]
718
     },
719
     "execution_count": 25,
720
     "metadata": {},
721
     "output_type": "execute_result"
722
    }
723
   ],
724
   "source": [
725
    "min_sales = df_sfo['sales'].min()\n",
726
    "min_sales"
727
   ]
728
  },
729
  {
730
   "cell_type": "code",
731
   "execution_count": 26,
732
   "metadata": {},
733
   "outputs": [
734
    {
735
     "data": {
736
      "text/plain": [
737
       "190"
738
      ]
739
     },
740
     "execution_count": 26,
741
     "metadata": {},
742
     "output_type": "execute_result"
743
    }
744
   ],
745
   "source": [
746
    "max_sales = df_sfo['sales'].max()\n",
747
    "max_sales"
748
   ]
749
  },
750
  {
751
   "cell_type": "code",
752
   "execution_count": 27,
753
   "metadata": {},
754
   "outputs": [
755
    {
756
     "data": {
757
      "text/plain": [
758
       "95"
759
      ]
760
     },
761
     "execution_count": 27,
762
     "metadata": {},
763
     "output_type": "execute_result"
764
    }
765
   ],
766
   "source": [
767
    "max_sales - min_sales"
768
   ]
769
  },
770
  {
771
   "cell_type": "code",
772
   "execution_count": 28,
773
   "metadata": {},
774
   "outputs": [],
775
   "source": [
776
    "def min_max(df):\n",
777
    "    min_sales = df['sales'].min()\n",
778
    "    max_sales = df['sales'].max()\n",
779
    "    \n",
780
    "    return max_sales - min_sales"
781
   ]
782
  },
783
  {
784
   "cell_type": "code",
785
   "execution_count": 30,
786
   "metadata": {},
787
   "outputs": [
788
    {
789
     "data": {
790
      "text/plain": [
791
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fc77860ac40>"
792
      ]
793
     },
794
     "execution_count": 30,
795
     "metadata": {},
796
     "output_type": "execute_result"
797
    }
798
   ],
799
   "source": [
800
    "df.groupby('region')"
801
   ]
802
  },
803
  {
804
   "cell_type": "code",
805
   "execution_count": 29,
806
   "metadata": {},
807
   "outputs": [
808
    {
809
     "data": {
810
      "text/plain": [
811
       "region\n",
812
       "ДВФО    250\n",
813
       "СФО      95\n",
814
       "dtype: int64"
815
      ]
816
     },
817
     "execution_count": 29,
818
     "metadata": {},
819
     "output_type": "execute_result"
820
    }
821
   ],
822
   "source": [
823
    "df.groupby('region').apply(min_max)  # на выход функции подается датафрейм вроде df_sfo"
824
   ]
825
  },
826
  {
827
   "cell_type": "code",
828
   "execution_count": 37,
829
   "metadata": {},
830
   "outputs": [
831
    {
832
     "data": {
833
      "text/plain": [
834
       "region  city       \n",
835
       "ДВФО    Владивосток    250.0\n",
836
       "СФО     Новосибирск      0.0\n",
837
       "        Томск          190.0\n",
838
       "dtype: float64"
839
      ]
840
     },
841
     "execution_count": 37,
842
     "metadata": {},
843
     "output_type": "execute_result"
844
    }
845
   ],
846
   "source": [
847
    "df.groupby(['region', 'city']).apply(min_max)"
848
   ]
849
  },
850
  {
851
   "cell_type": "markdown",
852
   "metadata": {},
853
   "source": [
854
    "# Группировки с пустыми значениями"
855
   ]
856
  },
857
  {
858
   "cell_type": "code",
859
   "execution_count": 34,
860
   "metadata": {},
861
   "outputs": [
862
    {
863
     "data": {
864
      "text/html": [
865
       "<div>\n",
866
       "<style scoped>\n",
867
       "    .dataframe tbody tr th:only-of-type {\n",
868
       "        vertical-align: middle;\n",
869
       "    }\n",
870
       "\n",
871
       "    .dataframe tbody tr th {\n",
872
       "        vertical-align: top;\n",
873
       "    }\n",
874
       "\n",
875
       "    .dataframe thead th {\n",
876
       "        text-align: right;\n",
877
       "    }\n",
878
       "</style>\n",
879
       "<table border=\"1\" class=\"dataframe\">\n",
880
       "  <thead>\n",
881
       "    <tr style=\"text-align: right;\">\n",
882
       "      <th></th>\n",
883
       "      <th>region</th>\n",
884
       "      <th>city</th>\n",
885
       "      <th>sales</th>\n",
886
       "    </tr>\n",
887
       "  </thead>\n",
888
       "  <tbody>\n",
889
       "    <tr>\n",
890
       "      <th>0</th>\n",
891
       "      <td>СФО</td>\n",
892
       "      <td>Новосибирск</td>\n",
893
       "      <td>NaN</td>\n",
894
       "    </tr>\n",
895
       "    <tr>\n",
896
       "      <th>1</th>\n",
897
       "      <td>СФО</td>\n",
898
       "      <td>Томск</td>\n",
899
       "      <td>0.0</td>\n",
900
       "    </tr>\n",
901
       "    <tr>\n",
902
       "      <th>2</th>\n",
903
       "      <td>ДВФО</td>\n",
904
       "      <td>Владивосток</td>\n",
905
       "      <td>290.0</td>\n",
906
       "    </tr>\n",
907
       "    <tr>\n",
908
       "      <th>3</th>\n",
909
       "      <td>СФО</td>\n",
910
       "      <td>Новосибирск</td>\n",
911
       "      <td>95.0</td>\n",
912
       "    </tr>\n",
913
       "    <tr>\n",
914
       "      <th>4</th>\n",
915
       "      <td>ДВФО</td>\n",
916
       "      <td>Владивосток</td>\n",
917
       "      <td>100.0</td>\n",
918
       "    </tr>\n",
919
       "    <tr>\n",
920
       "      <th>5</th>\n",
921
       "      <td>ДВФО</td>\n",
922
       "      <td>Владивосток</td>\n",
923
       "      <td>50.0</td>\n",
924
       "    </tr>\n",
925
       "    <tr>\n",
926
       "      <th>6</th>\n",
927
       "      <td>ДВФО</td>\n",
928
       "      <td>Владивосток</td>\n",
929
       "      <td>40.0</td>\n",
930
       "    </tr>\n",
931
       "    <tr>\n",
932
       "      <th>7</th>\n",
933
       "      <td>ДВФО</td>\n",
934
       "      <td>Владивосток</td>\n",
935
       "      <td>120.0</td>\n",
936
       "    </tr>\n",
937
       "    <tr>\n",
938
       "      <th>8</th>\n",
939
       "      <td>СФО</td>\n",
940
       "      <td>Томск</td>\n",
941
       "      <td>190.0</td>\n",
942
       "    </tr>\n",
943
       "    <tr>\n",
944
       "      <th>9</th>\n",
945
       "      <td>None</td>\n",
946
       "      <td>Владивосток</td>\n",
947
       "      <td>200.0</td>\n",
948
       "    </tr>\n",
949
       "  </tbody>\n",
950
       "</table>\n",
951
       "</div>"
952
      ],
953
      "text/plain": [
954
       "  region         city  sales\n",
955
       "0    СФО  Новосибирск    NaN\n",
956
       "1    СФО        Томск    0.0\n",
957
       "2   ДВФО  Владивосток  290.0\n",
958
       "3    СФО  Новосибирск   95.0\n",
959
       "4   ДВФО  Владивосток  100.0\n",
960
       "5   ДВФО  Владивосток   50.0\n",
961
       "6   ДВФО  Владивосток   40.0\n",
962
       "7   ДВФО  Владивосток  120.0\n",
963
       "8    СФО        Томск  190.0\n",
964
       "9   None  Владивосток  200.0"
965
      ]
966
     },
967
     "execution_count": 34,
968
     "metadata": {},
969
     "output_type": "execute_result"
970
    }
971
   ],
972
   "source": [
973
    "df = pd.DataFrame(\n",
974
    "    {\n",
975
    "        'region': ['СФО', 'СФО', 'ДВФО', 'СФО', 'ДВФО', 'ДВФО', 'ДВФО', 'ДВФО', 'СФО', None],\n",
976
    "        'city': ['Новосибирск', 'Томск', 'Владивосток', 'Новосибирск', 'Владивосток', 'Владивосток', 'Владивосток', 'Владивосток', 'Томск', 'Владивосток'],\n",
977
    "        'sales': [None, 0, 290, 95, 100, 50, 40, 120, 190, 200],\n",
978
    "    }\n",
979
    ")\n",
980
    "df"
981
   ]
982
  },
983
  {
984
   "cell_type": "code",
985
   "execution_count": 35,
986
   "metadata": {},
987
   "outputs": [
988
    {
989
     "data": {
990
      "text/plain": [
991
       "ДВФО    5\n",
992
       "СФО     4\n",
993
       "Name: region, dtype: int64"
994
      ]
995
     },
996
     "execution_count": 35,
997
     "metadata": {},
998
     "output_type": "execute_result"
999
    }
1000
   ],
1001
   "source": [
1002
    "df['region'].value_counts()  # группировка НЕ учитывает строки с пустым ключом"
1003
   ]
1004
  },
1005
  {
1006
   "cell_type": "code",
1007
   "execution_count": 47,
1008
   "metadata": {},
1009
   "outputs": [
1010
    {
1011
     "data": {
1012
      "text/plain": [
1013
       "Index(['ДВФО', 'СФО'], dtype='object')"
1014
      ]
1015
     },
1016
     "execution_count": 47,
1017
     "metadata": {},
1018
     "output_type": "execute_result"
1019
    }
1020
   ],
1021
   "source": [
1022
    "df['region'].value_counts().index"
1023
   ]
1024
  },
1025
  {
1026
   "cell_type": "code",
1027
   "execution_count": 48,
1028
   "metadata": {},
1029
   "outputs": [
1030
    {
1031
     "data": {
1032
      "text/plain": [
1033
       "array([5, 4])"
1034
      ]
1035
     },
1036
     "execution_count": 48,
1037
     "metadata": {},
1038
     "output_type": "execute_result"
1039
    }
1040
   ],
1041
   "source": [
1042
    "df['region'].value_counts().values"
1043
   ]
1044
  },
1045
  {
1046
   "cell_type": "code",
1047
   "execution_count": 36,
1048
   "metadata": {},
1049
   "outputs": [
1050
    {
1051
     "data": {
1052
      "text/html": [
1053
       "<div>\n",
1054
       "<style scoped>\n",
1055
       "    .dataframe tbody tr th:only-of-type {\n",
1056
       "        vertical-align: middle;\n",
1057
       "    }\n",
1058
       "\n",
1059
       "    .dataframe tbody tr th {\n",
1060
       "        vertical-align: top;\n",
1061
       "    }\n",
1062
       "\n",
1063
       "    .dataframe thead th {\n",
1064
       "        text-align: right;\n",
1065
       "    }\n",
1066
       "</style>\n",
1067
       "<table border=\"1\" class=\"dataframe\">\n",
1068
       "  <thead>\n",
1069
       "    <tr style=\"text-align: right;\">\n",
1070
       "      <th></th>\n",
1071
       "      <th>city</th>\n",
1072
       "      <th>sales</th>\n",
1073
       "    </tr>\n",
1074
       "    <tr>\n",
1075
       "      <th>region</th>\n",
1076
       "      <th></th>\n",
1077
       "      <th></th>\n",
1078
       "    </tr>\n",
1079
       "  </thead>\n",
1080
       "  <tbody>\n",
1081
       "    <tr>\n",
1082
       "      <th>ДВФО</th>\n",
1083
       "      <td>5</td>\n",
1084
       "      <td>5</td>\n",
1085
       "    </tr>\n",
1086
       "    <tr>\n",
1087
       "      <th>СФО</th>\n",
1088
       "      <td>4</td>\n",
1089
       "      <td>3</td>\n",
1090
       "    </tr>\n",
1091
       "  </tbody>\n",
1092
       "</table>\n",
1093
       "</div>"
1094
      ],
1095
      "text/plain": [
1096
       "        city  sales\n",
1097
       "region             \n",
1098
       "ДВФО       5      5\n",
1099
       "СФО        4      3"
1100
      ]
1101
     },
1102
     "execution_count": 36,
1103
     "metadata": {},
1104
     "output_type": "execute_result"
1105
    }
1106
   ],
1107
   "source": [
1108
    "df.groupby('region').count()  # метод count возвращает количетство НЕПУСТЫХ строк"
1109
   ]
1110
  },
1111
  {
1112
   "cell_type": "markdown",
1113
   "metadata": {},
1114
   "source": [
1115
    "### Метод agg"
1116
   ]
1117
  },
1118
  {
1119
   "cell_type": "code",
1120
   "execution_count": 38,
1121
   "metadata": {},
1122
   "outputs": [
1123
    {
1124
     "data": {
1125
      "text/html": [
1126
       "<div>\n",
1127
       "<style scoped>\n",
1128
       "    .dataframe tbody tr th:only-of-type {\n",
1129
       "        vertical-align: middle;\n",
1130
       "    }\n",
1131
       "\n",
1132
       "    .dataframe tbody tr th {\n",
1133
       "        vertical-align: top;\n",
1134
       "    }\n",
1135
       "\n",
1136
       "    .dataframe thead th {\n",
1137
       "        text-align: right;\n",
1138
       "    }\n",
1139
       "</style>\n",
1140
       "<table border=\"1\" class=\"dataframe\">\n",
1141
       "  <thead>\n",
1142
       "    <tr style=\"text-align: right;\">\n",
1143
       "      <th></th>\n",
1144
       "      <th>region</th>\n",
1145
       "      <th>city</th>\n",
1146
       "      <th>sales</th>\n",
1147
       "    </tr>\n",
1148
       "  </thead>\n",
1149
       "  <tbody>\n",
1150
       "    <tr>\n",
1151
       "      <th>0</th>\n",
1152
       "      <td>СФО</td>\n",
1153
       "      <td>Новосибирск</td>\n",
1154
       "      <td>NaN</td>\n",
1155
       "    </tr>\n",
1156
       "    <tr>\n",
1157
       "      <th>1</th>\n",
1158
       "      <td>СФО</td>\n",
1159
       "      <td>Томск</td>\n",
1160
       "      <td>0.0</td>\n",
1161
       "    </tr>\n",
1162
       "    <tr>\n",
1163
       "      <th>2</th>\n",
1164
       "      <td>ДВФО</td>\n",
1165
       "      <td>Владивосток</td>\n",
1166
       "      <td>290.0</td>\n",
1167
       "    </tr>\n",
1168
       "    <tr>\n",
1169
       "      <th>3</th>\n",
1170
       "      <td>СФО</td>\n",
1171
       "      <td>Новосибирск</td>\n",
1172
       "      <td>95.0</td>\n",
1173
       "    </tr>\n",
1174
       "    <tr>\n",
1175
       "      <th>4</th>\n",
1176
       "      <td>ДВФО</td>\n",
1177
       "      <td>Владивосток</td>\n",
1178
       "      <td>100.0</td>\n",
1179
       "    </tr>\n",
1180
       "    <tr>\n",
1181
       "      <th>5</th>\n",
1182
       "      <td>ДВФО</td>\n",
1183
       "      <td>Владивосток</td>\n",
1184
       "      <td>50.0</td>\n",
1185
       "    </tr>\n",
1186
       "    <tr>\n",
1187
       "      <th>6</th>\n",
1188
       "      <td>ДВФО</td>\n",
1189
       "      <td>Владивосток</td>\n",
1190
       "      <td>40.0</td>\n",
1191
       "    </tr>\n",
1192
       "    <tr>\n",
1193
       "      <th>7</th>\n",
1194
       "      <td>ДВФО</td>\n",
1195
       "      <td>Владивосток</td>\n",
1196
       "      <td>120.0</td>\n",
1197
       "    </tr>\n",
1198
       "    <tr>\n",
1199
       "      <th>8</th>\n",
1200
       "      <td>СФО</td>\n",
1201
       "      <td>Томск</td>\n",
1202
       "      <td>190.0</td>\n",
1203
       "    </tr>\n",
1204
       "    <tr>\n",
1205
       "      <th>9</th>\n",
1206
       "      <td>None</td>\n",
1207
       "      <td>Владивосток</td>\n",
1208
       "      <td>200.0</td>\n",
1209
       "    </tr>\n",
1210
       "  </tbody>\n",
1211
       "</table>\n",
1212
       "</div>"
1213
      ],
1214
      "text/plain": [
1215
       "  region         city  sales\n",
1216
       "0    СФО  Новосибирск    NaN\n",
1217
       "1    СФО        Томск    0.0\n",
1218
       "2   ДВФО  Владивосток  290.0\n",
1219
       "3    СФО  Новосибирск   95.0\n",
1220
       "4   ДВФО  Владивосток  100.0\n",
1221
       "5   ДВФО  Владивосток   50.0\n",
1222
       "6   ДВФО  Владивосток   40.0\n",
1223
       "7   ДВФО  Владивосток  120.0\n",
1224
       "8    СФО        Томск  190.0\n",
1225
       "9   None  Владивосток  200.0"
1226
      ]
1227
     },
1228
     "execution_count": 38,
1229
     "metadata": {},
1230
     "output_type": "execute_result"
1231
    }
1232
   ],
1233
   "source": [
1234
    "df"
1235
   ]
1236
  },
1237
  {
1238
   "cell_type": "code",
1239
   "execution_count": 41,
1240
   "metadata": {},
1241
   "outputs": [
1242
    {
1243
     "data": {
1244
      "text/html": [
1245
       "<div>\n",
1246
       "<style scoped>\n",
1247
       "    .dataframe tbody tr th:only-of-type {\n",
1248
       "        vertical-align: middle;\n",
1249
       "    }\n",
1250
       "\n",
1251
       "    .dataframe tbody tr th {\n",
1252
       "        vertical-align: top;\n",
1253
       "    }\n",
1254
       "\n",
1255
       "    .dataframe thead th {\n",
1256
       "        text-align: right;\n",
1257
       "    }\n",
1258
       "</style>\n",
1259
       "<table border=\"1\" class=\"dataframe\">\n",
1260
       "  <thead>\n",
1261
       "    <tr style=\"text-align: right;\">\n",
1262
       "      <th></th>\n",
1263
       "      <th>city</th>\n",
1264
       "      <th>sales</th>\n",
1265
       "    </tr>\n",
1266
       "    <tr>\n",
1267
       "      <th>region</th>\n",
1268
       "      <th></th>\n",
1269
       "      <th></th>\n",
1270
       "    </tr>\n",
1271
       "  </thead>\n",
1272
       "  <tbody>\n",
1273
       "    <tr>\n",
1274
       "      <th>ДВФО</th>\n",
1275
       "      <td>5</td>\n",
1276
       "      <td>5</td>\n",
1277
       "    </tr>\n",
1278
       "    <tr>\n",
1279
       "      <th>СФО</th>\n",
1280
       "      <td>4</td>\n",
1281
       "      <td>3</td>\n",
1282
       "    </tr>\n",
1283
       "  </tbody>\n",
1284
       "</table>\n",
1285
       "</div>"
1286
      ],
1287
      "text/plain": [
1288
       "        city  sales\n",
1289
       "region             \n",
1290
       "ДВФО       5      5\n",
1291
       "СФО        4      3"
1292
      ]
1293
     },
1294
     "execution_count": 41,
1295
     "metadata": {},
1296
     "output_type": "execute_result"
1297
    }
1298
   ],
1299
   "source": [
1300
    "df.groupby('region').count()"
1301
   ]
1302
  },
1303
  {
1304
   "cell_type": "code",
1305
   "execution_count": 45,
1306
   "metadata": {},
1307
   "outputs": [
1308
    {
1309
     "data": {
1310
      "text/html": [
1311
       "<div>\n",
1312
       "<style scoped>\n",
1313
       "    .dataframe tbody tr th:only-of-type {\n",
1314
       "        vertical-align: middle;\n",
1315
       "    }\n",
1316
       "\n",
1317
       "    .dataframe tbody tr th {\n",
1318
       "        vertical-align: top;\n",
1319
       "    }\n",
1320
       "\n",
1321
       "    .dataframe thead tr th {\n",
1322
       "        text-align: left;\n",
1323
       "    }\n",
1324
       "\n",
1325
       "    .dataframe thead tr:last-of-type th {\n",
1326
       "        text-align: right;\n",
1327
       "    }\n",
1328
       "</style>\n",
1329
       "<table border=\"1\" class=\"dataframe\">\n",
1330
       "  <thead>\n",
1331
       "    <tr>\n",
1332
       "      <th></th>\n",
1333
       "      <th>sales</th>\n",
1334
       "      <th colspan=\"2\" halign=\"left\">city</th>\n",
1335
       "    </tr>\n",
1336
       "    <tr>\n",
1337
       "      <th></th>\n",
1338
       "      <th>count</th>\n",
1339
       "      <th>count</th>\n",
1340
       "      <th>min</th>\n",
1341
       "    </tr>\n",
1342
       "    <tr>\n",
1343
       "      <th>region</th>\n",
1344
       "      <th></th>\n",
1345
       "      <th></th>\n",
1346
       "      <th></th>\n",
1347
       "    </tr>\n",
1348
       "  </thead>\n",
1349
       "  <tbody>\n",
1350
       "    <tr>\n",
1351
       "      <th>ДВФО</th>\n",
1352
       "      <td>5</td>\n",
1353
       "      <td>5</td>\n",
1354
       "      <td>Владивосток</td>\n",
1355
       "    </tr>\n",
1356
       "    <tr>\n",
1357
       "      <th>СФО</th>\n",
1358
       "      <td>3</td>\n",
1359
       "      <td>4</td>\n",
1360
       "      <td>Новосибирск</td>\n",
1361
       "    </tr>\n",
1362
       "  </tbody>\n",
1363
       "</table>\n",
1364
       "</div>"
1365
      ],
1366
      "text/plain": [
1367
       "       sales  city             \n",
1368
       "       count count          min\n",
1369
       "region                         \n",
1370
       "ДВФО       5     5  Владивосток\n",
1371
       "СФО        3     4  Новосибирск"
1372
      ]
1373
     },
1374
     "execution_count": 45,
1375
     "metadata": {},
1376
     "output_type": "execute_result"
1377
    }
1378
   ],
1379
   "source": [
1380
    "df.groupby('region').agg({'sales': 'count', 'city': ['count', 'min']})"
1381
   ]
1382
  },
1383
  {
1384
   "cell_type": "code",
1385
   "execution_count": 49,
1386
   "metadata": {
1387
    "scrolled": true
1388
   },
1389
   "outputs": [
1390
    {
1391
     "ename": "KeyError",
1392
     "evalue": "'sales'",
1393
     "output_type": "error",
1394
     "traceback": [
1395
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1396
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
1397
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3620\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3621\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3622\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1398
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
1399
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
1400
      "\u001b[0;32mpandas/_libs/index_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.index.Int64Engine._check_type\u001b[0;34m()\u001b[0m\n",
1401
      "\u001b[0;31mKeyError\u001b[0m: 'sales'",
1402
      "\nThe above exception was the direct cause of the following exception:\n",
1403
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
1404
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, func, engine, engine_kwargs, *args, **kwargs)\u001b[0m\n\u001b[1;32m    286\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 287\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_python_agg_general\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    288\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1405
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/groupby.py\u001b[0m in \u001b[0;36m_python_agg_general\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1480\u001b[0m                 \u001b[0;31m# if this function is invalid for this dtype, we will ignore it.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1481\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1482\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1406
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/ops.py\u001b[0m in \u001b[0;36magg_series\u001b[0;34m(self, obj, func, preserve_dtype)\u001b[0m\n\u001b[1;32m    980\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 981\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_aggregate_series_pure_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    982\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
1407
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/ops.py\u001b[0m in \u001b[0;36m_aggregate_series_pure_python\u001b[0;34m(self, obj, func)\u001b[0m\n\u001b[1;32m   1004\u001b[0m             \u001b[0mgroup\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgroup\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"groupby\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1005\u001b[0;31m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1006\u001b[0m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibreduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextract_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1408
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/groupby.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m   1466\u001b[0m         \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_builtin_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1467\u001b[0;31m         \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1468\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
1409
      "\u001b[0;32m<ipython-input-28-fed892ac6ed2>\u001b[0m in \u001b[0;36mmin_max\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmin_max\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mmin_sales\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sales'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0mmax_sales\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sales'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1410
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    957\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mkey_is_scalar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 958\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    959\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
1411
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m   1068\u001b[0m         \u001b[0;31m# Similar to Index.get_value, but we do not fall back to positional\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1069\u001b[0;31m         \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1070\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_values_for_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1412
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3622\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3623\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3624\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1413
      "\u001b[0;31mKeyError\u001b[0m: 'sales'",
1414
      "\nDuring handling of the above exception, another exception occurred:\n",
1415
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
1416
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3620\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3621\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3622\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1417
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
1418
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
1419
      "\u001b[0;32mpandas/_libs/index_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.index.Int64Engine._check_type\u001b[0;34m()\u001b[0m\n",
1420
      "\u001b[0;31mKeyError\u001b[0m: 'sales'",
1421
      "\nThe above exception was the direct cause of the following exception:\n",
1422
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
1423
      "\u001b[0;32m<ipython-input-49-a1d8d4084517>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'region'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'sales'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'count'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_max\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'city'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'count'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'min'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1424
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, func, engine, engine_kwargs, *args, **kwargs)\u001b[0m\n\u001b[1;32m    867\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    868\u001b[0m         \u001b[0mop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGroupByApply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 869\u001b[0;31m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    870\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_dict_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    871\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1425
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36magg\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    166\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    167\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_dict_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 168\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg_dict_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    169\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mis_list_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    170\u001b[0m             \u001b[0;31m# we require a list, but not a 'str'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1426
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36magg_dict_like\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    473\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    474\u001b[0m             \u001b[0;31m# key used for column selection and output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 475\u001b[0;31m             results = {\n\u001b[0m\u001b[1;32m    476\u001b[0m                 \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_gotitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mndim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhow\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    477\u001b[0m             }\n",
1427
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    474\u001b[0m             \u001b[0;31m# key used for column selection and output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    475\u001b[0m             results = {\n\u001b[0;32m--> 476\u001b[0;31m                 \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_gotitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mndim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhow\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    477\u001b[0m             }\n\u001b[1;32m    478\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
1428
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, func, engine, engine_kwargs, *args, **kwargs)\u001b[0m\n\u001b[1;32m    269\u001b[0m             \u001b[0;31m# but not the class list / tuple itself.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    270\u001b[0m             \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmaybe_mangle_lambdas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 271\u001b[0;31m             \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_aggregate_multiple_funcs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    272\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mrelabeling\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    273\u001b[0m                 \u001b[0;31m# error: Incompatible types in assignment (expression has type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1429
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36m_aggregate_multiple_funcs\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    324\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    325\u001b[0m             \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbase\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOutputKey\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mposition\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 326\u001b[0;31m             \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maggregate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    328\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1430
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, func, engine, engine_kwargs, *args, **kwargs)\u001b[0m\n\u001b[1;32m    289\u001b[0m                 \u001b[0;31m# TODO: KeyError is raised in _python_agg_general,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    290\u001b[0m                 \u001b[0;31m#  see test_groupby.test_basic\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 291\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_aggregate_named\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    293\u001b[0m                 \u001b[0;31m# result is a dict whose keys are the elements of result_index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1431
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36m_aggregate_named\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m    413\u001b[0m             \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"name\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    414\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 415\u001b[0;31m             \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    416\u001b[0m             \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibreduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextract_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    417\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minitialized\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1432
      "\u001b[0;32m<ipython-input-28-fed892ac6ed2>\u001b[0m in \u001b[0;36mmin_max\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmin_max\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mmin_sales\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sales'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0mmax_sales\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sales'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mmax_sales\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mmin_sales\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1433
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    956\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    957\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mkey_is_scalar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 958\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    959\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    960\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_hashable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1434
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m   1067\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1068\u001b[0m         \u001b[0;31m# Similar to Index.get_value, but we do not fall back to positional\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1069\u001b[0;31m         \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1070\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_values_for_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1071\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
1435
      "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3621\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3622\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3623\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3624\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3625\u001b[0m                 \u001b[0;31m# If we have a listlike key, _check_indexing_error will raise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1436
      "\u001b[0;31mKeyError\u001b[0m: 'sales'"
1437
     ]
1438
    }
1439
   ],
1440
   "source": [
1441
    "df.groupby('region').agg({'sales': ['count', min_max], 'city': ['count', 'min']})"
1442
   ]
1443
  },
1444
  {
1445
   "cell_type": "code",
1446
   "execution_count": null,
1447
   "metadata": {},
1448
   "outputs": [],
1449
   "source": []
1450
  },
1451
  {
1452
   "cell_type": "markdown",
1453
   "metadata": {},
1454
   "source": [
1455
    "Функция min_max_agg для метода agg"
1456
   ]
1457
  },
1458
  {
1459
   "cell_type": "code",
1460
   "execution_count": 50,
1461
   "metadata": {},
1462
   "outputs": [],
1463
   "source": [
1464
    "def min_max_agg(df_column):  # на вход подается конкретный столбец\n",
1465
    "    min_sales = df_column.min()\n",
1466
    "    max_sales = df_column.max()\n",
1467
    "    \n",
1468
    "    return max_sales - min_sales"
1469
   ]
1470
  },
1471
  {
1472
   "cell_type": "code",
1473
   "execution_count": 52,
1474
   "metadata": {},
1475
   "outputs": [
1476
    {
1477
     "data": {
1478
      "text/html": [
1479
       "<div>\n",
1480
       "<style scoped>\n",
1481
       "    .dataframe tbody tr th:only-of-type {\n",
1482
       "        vertical-align: middle;\n",
1483
       "    }\n",
1484
       "\n",
1485
       "    .dataframe tbody tr th {\n",
1486
       "        vertical-align: top;\n",
1487
       "    }\n",
1488
       "\n",
1489
       "    .dataframe thead tr th {\n",
1490
       "        text-align: left;\n",
1491
       "    }\n",
1492
       "\n",
1493
       "    .dataframe thead tr:last-of-type th {\n",
1494
       "        text-align: right;\n",
1495
       "    }\n",
1496
       "</style>\n",
1497
       "<table border=\"1\" class=\"dataframe\">\n",
1498
       "  <thead>\n",
1499
       "    <tr>\n",
1500
       "      <th></th>\n",
1501
       "      <th colspan=\"2\" halign=\"left\">sales</th>\n",
1502
       "      <th colspan=\"2\" halign=\"left\">city</th>\n",
1503
       "    </tr>\n",
1504
       "    <tr>\n",
1505
       "      <th></th>\n",
1506
       "      <th>count</th>\n",
1507
       "      <th>min_max_agg</th>\n",
1508
       "      <th>count</th>\n",
1509
       "      <th>min</th>\n",
1510
       "    </tr>\n",
1511
       "    <tr>\n",
1512
       "      <th>region</th>\n",
1513
       "      <th></th>\n",
1514
       "      <th></th>\n",
1515
       "      <th></th>\n",
1516
       "      <th></th>\n",
1517
       "    </tr>\n",
1518
       "  </thead>\n",
1519
       "  <tbody>\n",
1520
       "    <tr>\n",
1521
       "      <th>ДВФО</th>\n",
1522
       "      <td>5</td>\n",
1523
       "      <td>250.0</td>\n",
1524
       "      <td>5</td>\n",
1525
       "      <td>Владивосток</td>\n",
1526
       "    </tr>\n",
1527
       "    <tr>\n",
1528
       "      <th>СФО</th>\n",
1529
       "      <td>3</td>\n",
1530
       "      <td>190.0</td>\n",
1531
       "      <td>4</td>\n",
1532
       "      <td>Новосибирск</td>\n",
1533
       "    </tr>\n",
1534
       "  </tbody>\n",
1535
       "</table>\n",
1536
       "</div>"
1537
      ],
1538
      "text/plain": [
1539
       "       sales              city             \n",
1540
       "       count min_max_agg count          min\n",
1541
       "region                                     \n",
1542
       "ДВФО       5       250.0     5  Владивосток\n",
1543
       "СФО        3       190.0     4  Новосибирск"
1544
      ]
1545
     },
1546
     "execution_count": 52,
1547
     "metadata": {},
1548
     "output_type": "execute_result"
1549
    }
1550
   ],
1551
   "source": [
1552
    "df.groupby('region').agg({'sales': ['count', min_max_agg], 'city': ['count', min]})"
1553
   ]
1554
  },
1555
  {
1556
   "cell_type": "code",
1557
   "execution_count": null,
1558
   "metadata": {},
1559
   "outputs": [],
1560
   "source": []
1561
  },
1562
  {
1563
   "cell_type": "markdown",
1564
   "metadata": {},
1565
   "source": [
1566
    "### Посчитать процент строки от ее региона"
1567
   ]
1568
  },
1569
  {
1570
   "cell_type": "code",
1571
   "execution_count": 55,
1572
   "metadata": {},
1573
   "outputs": [
1574
    {
1575
     "data": {
1576
      "text/html": [
1577
       "<div>\n",
1578
       "<style scoped>\n",
1579
       "    .dataframe tbody tr th:only-of-type {\n",
1580
       "        vertical-align: middle;\n",
1581
       "    }\n",
1582
       "\n",
1583
       "    .dataframe tbody tr th {\n",
1584
       "        vertical-align: top;\n",
1585
       "    }\n",
1586
       "\n",
1587
       "    .dataframe thead th {\n",
1588
       "        text-align: right;\n",
1589
       "    }\n",
1590
       "</style>\n",
1591
       "<table border=\"1\" class=\"dataframe\">\n",
1592
       "  <thead>\n",
1593
       "    <tr style=\"text-align: right;\">\n",
1594
       "      <th></th>\n",
1595
       "      <th>region</th>\n",
1596
       "      <th>city</th>\n",
1597
       "      <th>sales</th>\n",
1598
       "    </tr>\n",
1599
       "  </thead>\n",
1600
       "  <tbody>\n",
1601
       "    <tr>\n",
1602
       "      <th>2</th>\n",
1603
       "      <td>ДВФО</td>\n",
1604
       "      <td>Владивосток</td>\n",
1605
       "      <td>290</td>\n",
1606
       "    </tr>\n",
1607
       "    <tr>\n",
1608
       "      <th>4</th>\n",
1609
       "      <td>ДВФО</td>\n",
1610
       "      <td>Владивосток</td>\n",
1611
       "      <td>100</td>\n",
1612
       "    </tr>\n",
1613
       "    <tr>\n",
1614
       "      <th>5</th>\n",
1615
       "      <td>ДВФО</td>\n",
1616
       "      <td>Владивосток</td>\n",
1617
       "      <td>50</td>\n",
1618
       "    </tr>\n",
1619
       "    <tr>\n",
1620
       "      <th>6</th>\n",
1621
       "      <td>ДВФО</td>\n",
1622
       "      <td>Владивосток</td>\n",
1623
       "      <td>40</td>\n",
1624
       "    </tr>\n",
1625
       "    <tr>\n",
1626
       "      <th>7</th>\n",
1627
       "      <td>ДВФО</td>\n",
1628
       "      <td>Владивосток</td>\n",
1629
       "      <td>120</td>\n",
1630
       "    </tr>\n",
1631
       "    <tr>\n",
1632
       "      <th>9</th>\n",
1633
       "      <td>ДВФО</td>\n",
1634
       "      <td>Владивосток</td>\n",
1635
       "      <td>200</td>\n",
1636
       "    </tr>\n",
1637
       "    <tr>\n",
1638
       "      <th>0</th>\n",
1639
       "      <td>СФО</td>\n",
1640
       "      <td>Новосибирск</td>\n",
1641
       "      <td>140</td>\n",
1642
       "    </tr>\n",
1643
       "    <tr>\n",
1644
       "      <th>3</th>\n",
1645
       "      <td>СФО</td>\n",
1646
       "      <td>Новосибирск</td>\n",
1647
       "      <td>95</td>\n",
1648
       "    </tr>\n",
1649
       "    <tr>\n",
1650
       "      <th>1</th>\n",
1651
       "      <td>СФО</td>\n",
1652
       "      <td>Томск</td>\n",
1653
       "      <td>135</td>\n",
1654
       "    </tr>\n",
1655
       "    <tr>\n",
1656
       "      <th>8</th>\n",
1657
       "      <td>СФО</td>\n",
1658
       "      <td>Томск</td>\n",
1659
       "      <td>190</td>\n",
1660
       "    </tr>\n",
1661
       "  </tbody>\n",
1662
       "</table>\n",
1663
       "</div>"
1664
      ],
1665
      "text/plain": [
1666
       "  region         city  sales\n",
1667
       "2   ДВФО  Владивосток    290\n",
1668
       "4   ДВФО  Владивосток    100\n",
1669
       "5   ДВФО  Владивосток     50\n",
1670
       "6   ДВФО  Владивосток     40\n",
1671
       "7   ДВФО  Владивосток    120\n",
1672
       "9   ДВФО  Владивосток    200\n",
1673
       "0    СФО  Новосибирск    140\n",
1674
       "3    СФО  Новосибирск     95\n",
1675
       "1    СФО        Томск    135\n",
1676
       "8    СФО        Томск    190"
1677
      ]
1678
     },
1679
     "execution_count": 55,
1680
     "metadata": {},
1681
     "output_type": "execute_result"
1682
    }
1683
   ],
1684
   "source": [
1685
    "df = pd.DataFrame(\n",
1686
    "    {\n",
1687
    "        'region': ['СФО', 'СФО', 'ДВФО', 'СФО', 'ДВФО', 'ДВФО', 'ДВФО', 'ДВФО', 'СФО', 'ДВФО'],\n",
1688
    "        'city': ['Новосибирск', 'Томск', 'Владивосток', 'Новосибирск', 'Владивосток', 'Владивосток', 'Владивосток', 'Владивосток', 'Томск', 'Владивосток'],\n",
1689
    "        'sales': [140, 135, 290, 95, 100, 50, 40, 120, 190, 200],\n",
1690
    "    }\n",
1691
    ")\n",
1692
    "df.sort_values(['region', 'city'])"
1693
   ]
1694
  },
1695
  {
1696
   "cell_type": "code",
1697
   "execution_count": null,
1698
   "metadata": {},
1699
   "outputs": [],
1700
   "source": [
1701
    "a = [1, 2, 3]\n",
1702
    "b = a.copy()"
1703
   ]
1704
  },
1705
  {
1706
   "cell_type": "code",
1707
   "execution_count": 59,
1708
   "metadata": {},
1709
   "outputs": [
1710
    {
1711
     "data": {
1712
      "text/html": [
1713
       "<div>\n",
1714
       "<style scoped>\n",
1715
       "    .dataframe tbody tr th:only-of-type {\n",
1716
       "        vertical-align: middle;\n",
1717
       "    }\n",
1718
       "\n",
1719
       "    .dataframe tbody tr th {\n",
1720
       "        vertical-align: top;\n",
1721
       "    }\n",
1722
       "\n",
1723
       "    .dataframe thead th {\n",
1724
       "        text-align: right;\n",
1725
       "    }\n",
1726
       "</style>\n",
1727
       "<table border=\"1\" class=\"dataframe\">\n",
1728
       "  <thead>\n",
1729
       "    <tr style=\"text-align: right;\">\n",
1730
       "      <th></th>\n",
1731
       "      <th>region</th>\n",
1732
       "      <th>city</th>\n",
1733
       "      <th>sales</th>\n",
1734
       "    </tr>\n",
1735
       "  </thead>\n",
1736
       "  <tbody>\n",
1737
       "    <tr>\n",
1738
       "      <th>2</th>\n",
1739
       "      <td>ДВФО</td>\n",
1740
       "      <td>Владивосток</td>\n",
1741
       "      <td>290</td>\n",
1742
       "    </tr>\n",
1743
       "    <tr>\n",
1744
       "      <th>4</th>\n",
1745
       "      <td>ДВФО</td>\n",
1746
       "      <td>Владивосток</td>\n",
1747
       "      <td>100</td>\n",
1748
       "    </tr>\n",
1749
       "    <tr>\n",
1750
       "      <th>5</th>\n",
1751
       "      <td>ДВФО</td>\n",
1752
       "      <td>Владивосток</td>\n",
1753
       "      <td>50</td>\n",
1754
       "    </tr>\n",
1755
       "    <tr>\n",
1756
       "      <th>6</th>\n",
1757
       "      <td>ДВФО</td>\n",
1758
       "      <td>Владивосток</td>\n",
1759
       "      <td>40</td>\n",
1760
       "    </tr>\n",
1761
       "    <tr>\n",
1762
       "      <th>7</th>\n",
1763
       "      <td>ДВФО</td>\n",
1764
       "      <td>Владивосток</td>\n",
1765
       "      <td>120</td>\n",
1766
       "    </tr>\n",
1767
       "    <tr>\n",
1768
       "      <th>9</th>\n",
1769
       "      <td>ДВФО</td>\n",
1770
       "      <td>Владивосток</td>\n",
1771
       "      <td>200</td>\n",
1772
       "    </tr>\n",
1773
       "  </tbody>\n",
1774
       "</table>\n",
1775
       "</div>"
1776
      ],
1777
      "text/plain": [
1778
       "  region         city  sales\n",
1779
       "2   ДВФО  Владивосток    290\n",
1780
       "4   ДВФО  Владивосток    100\n",
1781
       "5   ДВФО  Владивосток     50\n",
1782
       "6   ДВФО  Владивосток     40\n",
1783
       "7   ДВФО  Владивосток    120\n",
1784
       "9   ДВФО  Владивосток    200"
1785
      ]
1786
     },
1787
     "execution_count": 59,
1788
     "metadata": {},
1789
     "output_type": "execute_result"
1790
    }
1791
   ],
1792
   "source": [
1793
    "df_dv = df.copy()\n",
1794
    "\n",
1795
    "df_dv = df_dv[df_dv['region'] == 'ДВФО']\n",
1796
    "df_dv"
1797
   ]
1798
  },
1799
  {
1800
   "cell_type": "code",
1801
   "execution_count": 60,
1802
   "metadata": {},
1803
   "outputs": [
1804
    {
1805
     "data": {
1806
      "text/plain": [
1807
       "800"
1808
      ]
1809
     },
1810
     "execution_count": 60,
1811
     "metadata": {},
1812
     "output_type": "execute_result"
1813
    }
1814
   ],
1815
   "source": [
1816
    "sum_sales = df_dv['sales'].sum()\n",
1817
    "sum_sales"
1818
   ]
1819
  },
1820
  {
1821
   "cell_type": "code",
1822
   "execution_count": 61,
1823
   "metadata": {},
1824
   "outputs": [
1825
    {
1826
     "data": {
1827
      "text/html": [
1828
       "<div>\n",
1829
       "<style scoped>\n",
1830
       "    .dataframe tbody tr th:only-of-type {\n",
1831
       "        vertical-align: middle;\n",
1832
       "    }\n",
1833
       "\n",
1834
       "    .dataframe tbody tr th {\n",
1835
       "        vertical-align: top;\n",
1836
       "    }\n",
1837
       "\n",
1838
       "    .dataframe thead th {\n",
1839
       "        text-align: right;\n",
1840
       "    }\n",
1841
       "</style>\n",
1842
       "<table border=\"1\" class=\"dataframe\">\n",
1843
       "  <thead>\n",
1844
       "    <tr style=\"text-align: right;\">\n",
1845
       "      <th></th>\n",
1846
       "      <th>region</th>\n",
1847
       "      <th>city</th>\n",
1848
       "      <th>sales</th>\n",
1849
       "      <th>share</th>\n",
1850
       "    </tr>\n",
1851
       "  </thead>\n",
1852
       "  <tbody>\n",
1853
       "    <tr>\n",
1854
       "      <th>2</th>\n",
1855
       "      <td>ДВФО</td>\n",
1856
       "      <td>Владивосток</td>\n",
1857
       "      <td>290</td>\n",
1858
       "      <td>0.3625</td>\n",
1859
       "    </tr>\n",
1860
       "    <tr>\n",
1861
       "      <th>4</th>\n",
1862
       "      <td>ДВФО</td>\n",
1863
       "      <td>Владивосток</td>\n",
1864
       "      <td>100</td>\n",
1865
       "      <td>0.1250</td>\n",
1866
       "    </tr>\n",
1867
       "    <tr>\n",
1868
       "      <th>5</th>\n",
1869
       "      <td>ДВФО</td>\n",
1870
       "      <td>Владивосток</td>\n",
1871
       "      <td>50</td>\n",
1872
       "      <td>0.0625</td>\n",
1873
       "    </tr>\n",
1874
       "    <tr>\n",
1875
       "      <th>6</th>\n",
1876
       "      <td>ДВФО</td>\n",
1877
       "      <td>Владивосток</td>\n",
1878
       "      <td>40</td>\n",
1879
       "      <td>0.0500</td>\n",
1880
       "    </tr>\n",
1881
       "    <tr>\n",
1882
       "      <th>7</th>\n",
1883
       "      <td>ДВФО</td>\n",
1884
       "      <td>Владивосток</td>\n",
1885
       "      <td>120</td>\n",
1886
       "      <td>0.1500</td>\n",
1887
       "    </tr>\n",
1888
       "    <tr>\n",
1889
       "      <th>9</th>\n",
1890
       "      <td>ДВФО</td>\n",
1891
       "      <td>Владивосток</td>\n",
1892
       "      <td>200</td>\n",
1893
       "      <td>0.2500</td>\n",
1894
       "    </tr>\n",
1895
       "  </tbody>\n",
1896
       "</table>\n",
1897
       "</div>"
1898
      ],
1899
      "text/plain": [
1900
       "  region         city  sales   share\n",
1901
       "2   ДВФО  Владивосток    290  0.3625\n",
1902
       "4   ДВФО  Владивосток    100  0.1250\n",
1903
       "5   ДВФО  Владивосток     50  0.0625\n",
1904
       "6   ДВФО  Владивосток     40  0.0500\n",
1905
       "7   ДВФО  Владивосток    120  0.1500\n",
1906
       "9   ДВФО  Владивосток    200  0.2500"
1907
      ]
1908
     },
1909
     "execution_count": 61,
1910
     "metadata": {},
1911
     "output_type": "execute_result"
1912
    }
1913
   ],
1914
   "source": [
1915
    "df_dv['share'] = df_dv['sales'] / sum_sales\n",
1916
    "df_dv"
1917
   ]
1918
  },
1919
  {
1920
   "cell_type": "code",
1921
   "execution_count": 62,
1922
   "metadata": {},
1923
   "outputs": [],
1924
   "source": [
1925
    "def share(df):\n",
1926
    "    sum_sales = df['sales'].sum()\n",
1927
    "    df['share'] = df['sales'] / sum_sales\n",
1928
    "    \n",
1929
    "    return df"
1930
   ]
1931
  },
1932
  {
1933
   "cell_type": "code",
1934
   "execution_count": 63,
1935
   "metadata": {},
1936
   "outputs": [
1937
    {
1938
     "data": {
1939
      "text/html": [
1940
       "<div>\n",
1941
       "<style scoped>\n",
1942
       "    .dataframe tbody tr th:only-of-type {\n",
1943
       "        vertical-align: middle;\n",
1944
       "    }\n",
1945
       "\n",
1946
       "    .dataframe tbody tr th {\n",
1947
       "        vertical-align: top;\n",
1948
       "    }\n",
1949
       "\n",
1950
       "    .dataframe thead th {\n",
1951
       "        text-align: right;\n",
1952
       "    }\n",
1953
       "</style>\n",
1954
       "<table border=\"1\" class=\"dataframe\">\n",
1955
       "  <thead>\n",
1956
       "    <tr style=\"text-align: right;\">\n",
1957
       "      <th></th>\n",
1958
       "      <th>region</th>\n",
1959
       "      <th>city</th>\n",
1960
       "      <th>sales</th>\n",
1961
       "      <th>share</th>\n",
1962
       "    </tr>\n",
1963
       "  </thead>\n",
1964
       "  <tbody>\n",
1965
       "    <tr>\n",
1966
       "      <th>2</th>\n",
1967
       "      <td>ДВФО</td>\n",
1968
       "      <td>Владивосток</td>\n",
1969
       "      <td>290</td>\n",
1970
       "      <td>0.362500</td>\n",
1971
       "    </tr>\n",
1972
       "    <tr>\n",
1973
       "      <th>4</th>\n",
1974
       "      <td>ДВФО</td>\n",
1975
       "      <td>Владивосток</td>\n",
1976
       "      <td>100</td>\n",
1977
       "      <td>0.125000</td>\n",
1978
       "    </tr>\n",
1979
       "    <tr>\n",
1980
       "      <th>5</th>\n",
1981
       "      <td>ДВФО</td>\n",
1982
       "      <td>Владивосток</td>\n",
1983
       "      <td>50</td>\n",
1984
       "      <td>0.062500</td>\n",
1985
       "    </tr>\n",
1986
       "    <tr>\n",
1987
       "      <th>6</th>\n",
1988
       "      <td>ДВФО</td>\n",
1989
       "      <td>Владивосток</td>\n",
1990
       "      <td>40</td>\n",
1991
       "      <td>0.050000</td>\n",
1992
       "    </tr>\n",
1993
       "    <tr>\n",
1994
       "      <th>7</th>\n",
1995
       "      <td>ДВФО</td>\n",
1996
       "      <td>Владивосток</td>\n",
1997
       "      <td>120</td>\n",
1998
       "      <td>0.150000</td>\n",
1999
       "    </tr>\n",
2000
       "    <tr>\n",
2001
       "      <th>9</th>\n",
2002
       "      <td>ДВФО</td>\n",
2003
       "      <td>Владивосток</td>\n",
2004
       "      <td>200</td>\n",
2005
       "      <td>0.250000</td>\n",
2006
       "    </tr>\n",
2007
       "    <tr>\n",
2008
       "      <th>0</th>\n",
2009
       "      <td>СФО</td>\n",
2010
       "      <td>Новосибирск</td>\n",
2011
       "      <td>140</td>\n",
2012
       "      <td>0.250000</td>\n",
2013
       "    </tr>\n",
2014
       "    <tr>\n",
2015
       "      <th>3</th>\n",
2016
       "      <td>СФО</td>\n",
2017
       "      <td>Новосибирск</td>\n",
2018
       "      <td>95</td>\n",
2019
       "      <td>0.169643</td>\n",
2020
       "    </tr>\n",
2021
       "    <tr>\n",
2022
       "      <th>1</th>\n",
2023
       "      <td>СФО</td>\n",
2024
       "      <td>Томск</td>\n",
2025
       "      <td>135</td>\n",
2026
       "      <td>0.241071</td>\n",
2027
       "    </tr>\n",
2028
       "    <tr>\n",
2029
       "      <th>8</th>\n",
2030
       "      <td>СФО</td>\n",
2031
       "      <td>Томск</td>\n",
2032
       "      <td>190</td>\n",
2033
       "      <td>0.339286</td>\n",
2034
       "    </tr>\n",
2035
       "  </tbody>\n",
2036
       "</table>\n",
2037
       "</div>"
2038
      ],
2039
      "text/plain": [
2040
       "  region         city  sales     share\n",
2041
       "2   ДВФО  Владивосток    290  0.362500\n",
2042
       "4   ДВФО  Владивосток    100  0.125000\n",
2043
       "5   ДВФО  Владивосток     50  0.062500\n",
2044
       "6   ДВФО  Владивосток     40  0.050000\n",
2045
       "7   ДВФО  Владивосток    120  0.150000\n",
2046
       "9   ДВФО  Владивосток    200  0.250000\n",
2047
       "0    СФО  Новосибирск    140  0.250000\n",
2048
       "3    СФО  Новосибирск     95  0.169643\n",
2049
       "1    СФО        Томск    135  0.241071\n",
2050
       "8    СФО        Томск    190  0.339286"
2051
      ]
2052
     },
2053
     "execution_count": 63,
2054
     "metadata": {},
2055
     "output_type": "execute_result"
2056
    }
2057
   ],
2058
   "source": [
2059
    "df.groupby('region').apply(share).sort_values(['region', 'city'])"
2060
   ]
2061
  },
2062
  {
2063
   "cell_type": "code",
2064
   "execution_count": 64,
2065
   "metadata": {},
2066
   "outputs": [
2067
    {
2068
     "data": {
2069
      "text/html": [
2070
       "<div>\n",
2071
       "<style scoped>\n",
2072
       "    .dataframe tbody tr th:only-of-type {\n",
2073
       "        vertical-align: middle;\n",
2074
       "    }\n",
2075
       "\n",
2076
       "    .dataframe tbody tr th {\n",
2077
       "        vertical-align: top;\n",
2078
       "    }\n",
2079
       "\n",
2080
       "    .dataframe thead th {\n",
2081
       "        text-align: right;\n",
2082
       "    }\n",
2083
       "</style>\n",
2084
       "<table border=\"1\" class=\"dataframe\">\n",
2085
       "  <thead>\n",
2086
       "    <tr style=\"text-align: right;\">\n",
2087
       "      <th></th>\n",
2088
       "      <th>region</th>\n",
2089
       "      <th>city</th>\n",
2090
       "      <th>sales</th>\n",
2091
       "      <th>share</th>\n",
2092
       "    </tr>\n",
2093
       "  </thead>\n",
2094
       "  <tbody>\n",
2095
       "    <tr>\n",
2096
       "      <th>2</th>\n",
2097
       "      <td>ДВФО</td>\n",
2098
       "      <td>Владивосток</td>\n",
2099
       "      <td>290</td>\n",
2100
       "      <td>0.362500</td>\n",
2101
       "    </tr>\n",
2102
       "    <tr>\n",
2103
       "      <th>4</th>\n",
2104
       "      <td>ДВФО</td>\n",
2105
       "      <td>Владивосток</td>\n",
2106
       "      <td>100</td>\n",
2107
       "      <td>0.125000</td>\n",
2108
       "    </tr>\n",
2109
       "    <tr>\n",
2110
       "      <th>5</th>\n",
2111
       "      <td>ДВФО</td>\n",
2112
       "      <td>Владивосток</td>\n",
2113
       "      <td>50</td>\n",
2114
       "      <td>0.062500</td>\n",
2115
       "    </tr>\n",
2116
       "    <tr>\n",
2117
       "      <th>6</th>\n",
2118
       "      <td>ДВФО</td>\n",
2119
       "      <td>Владивосток</td>\n",
2120
       "      <td>40</td>\n",
2121
       "      <td>0.050000</td>\n",
2122
       "    </tr>\n",
2123
       "    <tr>\n",
2124
       "      <th>7</th>\n",
2125
       "      <td>ДВФО</td>\n",
2126
       "      <td>Владивосток</td>\n",
2127
       "      <td>120</td>\n",
2128
       "      <td>0.150000</td>\n",
2129
       "    </tr>\n",
2130
       "    <tr>\n",
2131
       "      <th>9</th>\n",
2132
       "      <td>ДВФО</td>\n",
2133
       "      <td>Владивосток</td>\n",
2134
       "      <td>200</td>\n",
2135
       "      <td>0.250000</td>\n",
2136
       "    </tr>\n",
2137
       "    <tr>\n",
2138
       "      <th>0</th>\n",
2139
       "      <td>СФО</td>\n",
2140
       "      <td>Новосибирск</td>\n",
2141
       "      <td>140</td>\n",
2142
       "      <td>0.595745</td>\n",
2143
       "    </tr>\n",
2144
       "    <tr>\n",
2145
       "      <th>3</th>\n",
2146
       "      <td>СФО</td>\n",
2147
       "      <td>Новосибирск</td>\n",
2148
       "      <td>95</td>\n",
2149
       "      <td>0.404255</td>\n",
2150
       "    </tr>\n",
2151
       "    <tr>\n",
2152
       "      <th>1</th>\n",
2153
       "      <td>СФО</td>\n",
2154
       "      <td>Томск</td>\n",
2155
       "      <td>135</td>\n",
2156
       "      <td>0.415385</td>\n",
2157
       "    </tr>\n",
2158
       "    <tr>\n",
2159
       "      <th>8</th>\n",
2160
       "      <td>СФО</td>\n",
2161
       "      <td>Томск</td>\n",
2162
       "      <td>190</td>\n",
2163
       "      <td>0.584615</td>\n",
2164
       "    </tr>\n",
2165
       "  </tbody>\n",
2166
       "</table>\n",
2167
       "</div>"
2168
      ],
2169
      "text/plain": [
2170
       "  region         city  sales     share\n",
2171
       "2   ДВФО  Владивосток    290  0.362500\n",
2172
       "4   ДВФО  Владивосток    100  0.125000\n",
2173
       "5   ДВФО  Владивосток     50  0.062500\n",
2174
       "6   ДВФО  Владивосток     40  0.050000\n",
2175
       "7   ДВФО  Владивосток    120  0.150000\n",
2176
       "9   ДВФО  Владивосток    200  0.250000\n",
2177
       "0    СФО  Новосибирск    140  0.595745\n",
2178
       "3    СФО  Новосибирск     95  0.404255\n",
2179
       "1    СФО        Томск    135  0.415385\n",
2180
       "8    СФО        Томск    190  0.584615"
2181
      ]
2182
     },
2183
     "execution_count": 64,
2184
     "metadata": {},
2185
     "output_type": "execute_result"
2186
    }
2187
   ],
2188
   "source": [
2189
    "df.groupby(['region', 'city']).apply(share).sort_values(['region', 'city'])"
2190
   ]
2191
  }
2192
 ],
2193
 "metadata": {
2194
  "kernelspec": {
2195
   "display_name": "Python 3 (ipykernel)",
2196
   "language": "python",
2197
   "name": "python3"
2198
  },
2199
  "language_info": {
2200
   "codemirror_mode": {
2201
    "name": "ipython",
2202
    "version": 3
2203
   },
2204
   "file_extension": ".py",
2205
   "mimetype": "text/x-python",
2206
   "name": "python",
2207
   "nbconvert_exporter": "python",
2208
   "pygments_lexer": "ipython3",
2209
   "version": "3.9.13"
2210
  }
2211
 },
2212
 "nbformat": 4,
2213
 "nbformat_minor": 4
2214
}
2215

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.