atlas_of_asteroids

Форк
0
/
2_merge_TNO_diameter_data.ipynb 
1009 строк · 32.5 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": null,
6
   "metadata": {},
7
   "outputs": [
8
    {
9
     "name": "stdout",
10
     "output_type": "stream",
11
     "text": [
12
      "numpy     1.15.4\n",
13
      "pandas    0.23.4\n",
14
      "watermark 1.8.1\n",
15
      "ELEANOR LUTZ 2019-06-03 \n",
16
      "\n",
17
      "CPython 3.7.1\n",
18
      "IPython 7.2.0\n",
19
      "\n",
20
      "compiler   : MSC v.1900 64 bit (AMD64)\n",
21
      "system     : Windows\n",
22
      "release    : 10\n",
23
      "machine    : AMD64\n",
24
      "processor  : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
25
      "CPU cores  : 12\n",
26
      "interpreter: 64bit\n"
27
     ]
28
    }
29
   ],
30
   "source": [
31
    "# Watermark is not required for this code, but is included for information. \n",
32
    "import watermark\n",
33
    "%load_ext watermark\n",
34
    "%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
35
   ]
36
  },
37
  {
38
   "cell_type": "code",
39
   "execution_count": null,
40
   "metadata": {},
41
   "outputs": [],
42
   "source": [
43
    "import numpy as np\n",
44
    "import pandas as pd\n",
45
    "import os.path"
46
   ]
47
  },
48
  {
49
   "cell_type": "code",
50
   "execution_count": null,
51
   "metadata": {},
52
   "outputs": [
53
    {
54
     "data": {
55
      "text/html": [
56
       "<div>\n",
57
       "<style scoped>\n",
58
       "    .dataframe tbody tr th:only-of-type {\n",
59
       "        vertical-align: middle;\n",
60
       "    }\n",
61
       "\n",
62
       "    .dataframe tbody tr th {\n",
63
       "        vertical-align: top;\n",
64
       "    }\n",
65
       "\n",
66
       "    .dataframe thead th {\n",
67
       "        text-align: right;\n",
68
       "    }\n",
69
       "</style>\n",
70
       "<table border=\"1\" class=\"dataframe\">\n",
71
       "  <thead>\n",
72
       "    <tr style=\"text-align: right;\">\n",
73
       "      <th></th>\n",
74
       "      <th>0</th>\n",
75
       "    </tr>\n",
76
       "  </thead>\n",
77
       "  <tbody>\n",
78
       "    <tr>\n",
79
       "      <th>0</th>\n",
80
       "      <td>2060  Chiron          1977 UB       13.7  0...</td>\n",
81
       "    </tr>\n",
82
       "    <tr>\n",
83
       "      <th>1</th>\n",
84
       "      <td>2060  Chiron          1977 UB       13.7  0...</td>\n",
85
       "    </tr>\n",
86
       "    <tr>\n",
87
       "      <th>2</th>\n",
88
       "      <td>2060  Chiron          1977 UB       13.7  0...</td>\n",
89
       "    </tr>\n",
90
       "    <tr>\n",
91
       "      <th>3</th>\n",
92
       "      <td>2060  Chiron          1977 UB       13.7  0...</td>\n",
93
       "    </tr>\n",
94
       "    <tr>\n",
95
       "      <th>4</th>\n",
96
       "      <td>2060  Chiron          1977 UB       13.7  0...</td>\n",
97
       "    </tr>\n",
98
       "  </tbody>\n",
99
       "</table>\n",
100
       "</div>"
101
      ],
102
      "text/plain": [
103
       "                                                   0\n",
104
       "0     2060  Chiron          1977 UB       13.7  0...\n",
105
       "1     2060  Chiron          1977 UB       13.7  0...\n",
106
       "2     2060  Chiron          1977 UB       13.7  0...\n",
107
       "3     2060  Chiron          1977 UB       13.7  0...\n",
108
       "4     2060  Chiron          1977 UB       13.7  0..."
109
      ]
110
     },
111
     "metadata": {},
112
     "output_type": "display_data"
113
    },
114
    {
115
     "data": {
116
      "text/html": [
117
       "<div>\n",
118
       "<style scoped>\n",
119
       "    .dataframe tbody tr th:only-of-type {\n",
120
       "        vertical-align: middle;\n",
121
       "    }\n",
122
       "\n",
123
       "    .dataframe tbody tr th {\n",
124
       "        vertical-align: top;\n",
125
       "    }\n",
126
       "\n",
127
       "    .dataframe thead th {\n",
128
       "        text-align: right;\n",
129
       "    }\n",
130
       "</style>\n",
131
       "<table border=\"1\" class=\"dataframe\">\n",
132
       "  <thead>\n",
133
       "    <tr style=\"text-align: right;\">\n",
134
       "      <th></th>\n",
135
       "      <th>number</th>\n",
136
       "      <th>name</th>\n",
137
       "      <th>full_name</th>\n",
138
       "      <th>axis_AU</th>\n",
139
       "      <th>eccentricity</th>\n",
140
       "      <th>inclination</th>\n",
141
       "      <th>type</th>\n",
142
       "      <th>companions</th>\n",
143
       "      <th>magnitude</th>\n",
144
       "      <th>abs_magnitude</th>\n",
145
       "      <th>...</th>\n",
146
       "      <th>albedo_err2</th>\n",
147
       "      <th>albedo_notecode</th>\n",
148
       "      <th>albedo_colorcode</th>\n",
149
       "      <th>density</th>\n",
150
       "      <th>density_err1</th>\n",
151
       "      <th>density_err2</th>\n",
152
       "      <th>density_notecode</th>\n",
153
       "      <th>methods_da</th>\n",
154
       "      <th>methods_density</th>\n",
155
       "      <th>reference</th>\n",
156
       "    </tr>\n",
157
       "  </thead>\n",
158
       "  <tbody>\n",
159
       "    <tr>\n",
160
       "      <th>0</th>\n",
161
       "      <td>2060</td>\n",
162
       "      <td>Chiron</td>\n",
163
       "      <td>1977UB</td>\n",
164
       "      <td>13.7</td>\n",
165
       "      <td>0.38</td>\n",
166
       "      <td>7.</td>\n",
167
       "      <td>CEN</td>\n",
168
       "      <td>0</td>\n",
169
       "      <td>5.8</td>\n",
170
       "      <td>6.900</td>\n",
171
       "      <td>...</td>\n",
172
       "      <td>0.035</td>\n",
173
       "      <td>*</td>\n",
174
       "      <td>-</td>\n",
175
       "      <td>-9.999</td>\n",
176
       "      <td>-9.999</td>\n",
177
       "      <td>-9.999</td>\n",
178
       "      <td>-</td>\n",
179
       "      <td>T</td>\n",
180
       "      <td>-</td>\n",
181
       "      <td>L84a</td>\n",
182
       "    </tr>\n",
183
       "    <tr>\n",
184
       "      <th>1</th>\n",
185
       "      <td>2060</td>\n",
186
       "      <td>Chiron</td>\n",
187
       "      <td>1977UB</td>\n",
188
       "      <td>13.7</td>\n",
189
       "      <td>0.38</td>\n",
190
       "      <td>7.</td>\n",
191
       "      <td>CEN</td>\n",
192
       "      <td>0</td>\n",
193
       "      <td>5.8</td>\n",
194
       "      <td>-99.999</td>\n",
195
       "      <td>...</td>\n",
196
       "      <td>-9.999</td>\n",
197
       "      <td>-</td>\n",
198
       "      <td>G</td>\n",
199
       "      <td>-9.999</td>\n",
200
       "      <td>-9.999</td>\n",
201
       "      <td>-9.999</td>\n",
202
       "      <td>-</td>\n",
203
       "      <td>T</td>\n",
204
       "      <td>-</td>\n",
205
       "      <td>S91a</td>\n",
206
       "    </tr>\n",
207
       "    <tr>\n",
208
       "      <th>2</th>\n",
209
       "      <td>2060</td>\n",
210
       "      <td>Chiron</td>\n",
211
       "      <td>1977UB</td>\n",
212
       "      <td>13.7</td>\n",
213
       "      <td>0.38</td>\n",
214
       "      <td>7.</td>\n",
215
       "      <td>CEN</td>\n",
216
       "      <td>0</td>\n",
217
       "      <td>5.8</td>\n",
218
       "      <td>6.800</td>\n",
219
       "      <td>...</td>\n",
220
       "      <td>-9.999</td>\n",
221
       "      <td>*</td>\n",
222
       "      <td>G</td>\n",
223
       "      <td>-9.999</td>\n",
224
       "      <td>-9.999</td>\n",
225
       "      <td>-9.999</td>\n",
226
       "      <td>-</td>\n",
227
       "      <td>T</td>\n",
228
       "      <td>-</td>\n",
229
       "      <td>J92a</td>\n",
230
       "    </tr>\n",
231
       "    <tr>\n",
232
       "      <th>3</th>\n",
233
       "      <td>2060</td>\n",
234
       "      <td>Chiron</td>\n",
235
       "      <td>1977UB</td>\n",
236
       "      <td>13.7</td>\n",
237
       "      <td>0.38</td>\n",
238
       "      <td>7.</td>\n",
239
       "      <td>CEN</td>\n",
240
       "      <td>0</td>\n",
241
       "      <td>5.8</td>\n",
242
       "      <td>-99.999</td>\n",
243
       "      <td>...</td>\n",
244
       "      <td>-9.999</td>\n",
245
       "      <td>-</td>\n",
246
       "      <td>-</td>\n",
247
       "      <td>-9.999</td>\n",
248
       "      <td>-9.999</td>\n",
249
       "      <td>-9.999</td>\n",
250
       "      <td>-</td>\n",
251
       "      <td>O</td>\n",
252
       "      <td>-</td>\n",
253
       "      <td>B93a</td>\n",
254
       "    </tr>\n",
255
       "    <tr>\n",
256
       "      <th>4</th>\n",
257
       "      <td>2060</td>\n",
258
       "      <td>Chiron</td>\n",
259
       "      <td>1977UB</td>\n",
260
       "      <td>13.7</td>\n",
261
       "      <td>0.38</td>\n",
262
       "      <td>7.</td>\n",
263
       "      <td>CEN</td>\n",
264
       "      <td>0</td>\n",
265
       "      <td>5.8</td>\n",
266
       "      <td>6.900</td>\n",
267
       "      <td>...</td>\n",
268
       "      <td>0.030</td>\n",
269
       "      <td>*</td>\n",
270
       "      <td>-</td>\n",
271
       "      <td>-9.999</td>\n",
272
       "      <td>-9.999</td>\n",
273
       "      <td>-9.999</td>\n",
274
       "      <td>-</td>\n",
275
       "      <td>T</td>\n",
276
       "      <td>-</td>\n",
277
       "      <td>C94a</td>\n",
278
       "    </tr>\n",
279
       "  </tbody>\n",
280
       "</table>\n",
281
       "<p>5 rows × 35 columns</p>\n",
282
       "</div>"
283
      ],
284
      "text/plain": [
285
       "  number    name full_name axis_AU eccentricity inclination type companions  \\\n",
286
       "0   2060  Chiron    1977UB    13.7         0.38          7.  CEN          0   \n",
287
       "1   2060  Chiron    1977UB    13.7         0.38          7.  CEN          0   \n",
288
       "2   2060  Chiron    1977UB    13.7         0.38          7.  CEN          0   \n",
289
       "3   2060  Chiron    1977UB    13.7         0.38          7.  CEN          0   \n",
290
       "4   2060  Chiron    1977UB    13.7         0.38          7.  CEN          0   \n",
291
       "\n",
292
       "  magnitude abs_magnitude    ...    albedo_err2 albedo_notecode  \\\n",
293
       "0       5.8         6.900    ...          0.035               *   \n",
294
       "1       5.8       -99.999    ...         -9.999               -   \n",
295
       "2       5.8         6.800    ...         -9.999               *   \n",
296
       "3       5.8       -99.999    ...         -9.999               -   \n",
297
       "4       5.8         6.900    ...          0.030               *   \n",
298
       "\n",
299
       "  albedo_colorcode density density_err1 density_err2 density_notecode  \\\n",
300
       "0                -  -9.999       -9.999       -9.999                -   \n",
301
       "1                G  -9.999       -9.999       -9.999                -   \n",
302
       "2                G  -9.999       -9.999       -9.999                -   \n",
303
       "3                -  -9.999       -9.999       -9.999                -   \n",
304
       "4                -  -9.999       -9.999       -9.999                -   \n",
305
       "\n",
306
       "  methods_da methods_density reference  \n",
307
       "0          T               -      L84a  \n",
308
       "1          T               -      S91a  \n",
309
       "2          T               -      J92a  \n",
310
       "3          O               -      B93a  \n",
311
       "4          T               -      C94a  \n",
312
       "\n",
313
       "[5 rows x 35 columns]"
314
      ]
315
     },
316
     "metadata": {},
317
     "output_type": "display_data"
318
    },
319
    {
320
     "data": {
321
      "text/html": [
322
       "<div>\n",
323
       "<style scoped>\n",
324
       "    .dataframe tbody tr th:only-of-type {\n",
325
       "        vertical-align: middle;\n",
326
       "    }\n",
327
       "\n",
328
       "    .dataframe tbody tr th {\n",
329
       "        vertical-align: top;\n",
330
       "    }\n",
331
       "\n",
332
       "    .dataframe thead th {\n",
333
       "        text-align: right;\n",
334
       "    }\n",
335
       "</style>\n",
336
       "<table border=\"1\" class=\"dataframe\">\n",
337
       "  <thead>\n",
338
       "    <tr style=\"text-align: right;\">\n",
339
       "      <th></th>\n",
340
       "      <th>number</th>\n",
341
       "      <th>name</th>\n",
342
       "      <th>full_name</th>\n",
343
       "      <th>type</th>\n",
344
       "      <th>diameter_km</th>\n",
345
       "    </tr>\n",
346
       "  </thead>\n",
347
       "  <tbody>\n",
348
       "    <tr>\n",
349
       "      <th>0</th>\n",
350
       "      <td>0</td>\n",
351
       "      <td></td>\n",
352
       "      <td>1996TS66</td>\n",
353
       "      <td>CUB</td>\n",
354
       "      <td>163.00</td>\n",
355
       "    </tr>\n",
356
       "    <tr>\n",
357
       "      <th>1</th>\n",
358
       "      <td>0</td>\n",
359
       "      <td></td>\n",
360
       "      <td>1998WW31</td>\n",
361
       "      <td>CUB</td>\n",
362
       "      <td>170.30</td>\n",
363
       "    </tr>\n",
364
       "    <tr>\n",
365
       "      <th>2</th>\n",
366
       "      <td>0</td>\n",
367
       "      <td></td>\n",
368
       "      <td>1999OJ4</td>\n",
369
       "      <td>CUB</td>\n",
370
       "      <td>103.95</td>\n",
371
       "    </tr>\n",
372
       "    <tr>\n",
373
       "      <th>3</th>\n",
374
       "      <td>0</td>\n",
375
       "      <td></td>\n",
376
       "      <td>2000CF105</td>\n",
377
       "      <td>CUB</td>\n",
378
       "      <td>78.30</td>\n",
379
       "    </tr>\n",
380
       "    <tr>\n",
381
       "      <th>4</th>\n",
382
       "      <td>0</td>\n",
383
       "      <td></td>\n",
384
       "      <td>2000GM137</td>\n",
385
       "      <td>CEN</td>\n",
386
       "      <td>8.80</td>\n",
387
       "    </tr>\n",
388
       "  </tbody>\n",
389
       "</table>\n",
390
       "</div>"
391
      ],
392
      "text/plain": [
393
       "  number name  full_name type  diameter_km\n",
394
       "0      0        1996TS66  CUB       163.00\n",
395
       "1      0        1998WW31  CUB       170.30\n",
396
       "2      0         1999OJ4  CUB       103.95\n",
397
       "3      0       2000CF105  CUB        78.30\n",
398
       "4      0       2000GM137  CEN         8.80"
399
      ]
400
     },
401
     "metadata": {},
402
     "output_type": "display_data"
403
    },
404
    {
405
     "name": "stdout",
406
     "output_type": "stream",
407
     "text": [
408
      "179 unique asteroids\n"
409
     ]
410
    }
411
   ],
412
   "source": [
413
    "# Manually assign column names from bundle_description.txt file\n",
414
    "colnames = ['number', 'name', 'full_name', 'axis_AU', 'eccentricity', 'inclination',\n",
415
    "            'type', 'companions', 'magnitude', 'abs_magnitude', 'abs_magnitude_err', \n",
416
    "            'diameter_km', 'e_diameter_err1', 'e_diameter_err2', 'e_diameter_notecode', \n",
417
    "            'p_diameter', 'p_diameter_err1', 'p_diameter_err2', 'p_diameter_notecode',\n",
418
    "            'c_diameter', 'c_diameter_err1', 'c_diameter_err2', 'c_diameter_notecode',\n",
419
    "            'albedo', 'albedo_err1', 'albedo_err2', 'albedo_notecode', 'albedo_colorcode', \n",
420
    "            'density', 'density_err1', 'density_err2', 'density_notecode', \n",
421
    "            'methods_da', 'methods_density', 'reference'\n",
422
    "           ]\n",
423
    "\n",
424
    "diams = pd.read_csv('./data/diameters/tno-centaur_diam-albedo-density/data/tno_centaur_diam_alb_dens.tab', \n",
425
    "                    header=None, sep='\\t', encoding='latin-1')\n",
426
    "\n",
427
    "display(diams.head())\n",
428
    "diams = diams[0].str.split(expand=True)\n",
429
    "diams[2] = diams[2] + diams[3]\n",
430
    "diams.drop([3], inplace=True, axis=1)\n",
431
    "\n",
432
    "diams.columns = colnames\n",
433
    "display(diams.head())\n",
434
    "\n",
435
    "diams = diams[['number', 'name', 'full_name', 'type', 'diameter_km', 'p_diameter']]\n",
436
    "diams['diameter_km'] = diams['diameter_km'].astype(float)\n",
437
    "diams['p_diameter'] = diams['p_diameter'].astype(float)\n",
438
    "diams.loc[~(diams['diameter_km'] > 0), 'diameter_km']=np.nan\n",
439
    "diams.loc[~(diams['p_diameter'] > 0), 'p_diameter']=np.nan\n",
440
    "diams['diameter_km'] = diams['diameter_km'].fillna(diams['p_diameter'])\n",
441
    "diams = diams[~pd.isnull(diams['diameter_km'])]\n",
442
    "\n",
443
    "diams.set_index(['number', 'name', 'full_name', 'type'], inplace=True)\n",
444
    "diams = diams.groupby(['number', 'name', 'full_name', 'type']).median()\n",
445
    "diams = diams.reset_index()\n",
446
    "diams['name'] = diams['name'].str.replace('-', '')\n",
447
    "diams.loc[diams['name'] == 'Pluto', 'full_name'] = '134340Pluto'\n",
448
    "diams.drop(['p_diameter'], axis=1, inplace=True)\n",
449
    "\n",
450
    "display(diams.head())\n",
451
    "savename = './data/diameters/TNO_Centaurs.csv'\n",
452
    "if not os.path.isfile(savename):\n",
453
    "    diams.to_csv(savename, index=False)\n",
454
    "\n",
455
    "print(len(diams), 'unique asteroids')"
456
   ]
457
  },
458
  {
459
   "cell_type": "code",
460
   "execution_count": null,
461
   "metadata": {},
462
   "outputs": [
463
    {
464
     "data": {
465
      "text/html": [
466
       "<div>\n",
467
       "<style scoped>\n",
468
       "    .dataframe tbody tr th:only-of-type {\n",
469
       "        vertical-align: middle;\n",
470
       "    }\n",
471
       "\n",
472
       "    .dataframe tbody tr th {\n",
473
       "        vertical-align: top;\n",
474
       "    }\n",
475
       "\n",
476
       "    .dataframe thead th {\n",
477
       "        text-align: right;\n",
478
       "    }\n",
479
       "</style>\n",
480
       "<table border=\"1\" class=\"dataframe\">\n",
481
       "  <thead>\n",
482
       "    <tr style=\"text-align: right;\">\n",
483
       "      <th></th>\n",
484
       "      <th>id</th>\n",
485
       "      <th>spkid</th>\n",
486
       "      <th>full_name</th>\n",
487
       "      <th>pdes</th>\n",
488
       "      <th>name</th>\n",
489
       "      <th>neo</th>\n",
490
       "      <th>pha</th>\n",
491
       "      <th>diameter</th>\n",
492
       "      <th>prefix</th>\n",
493
       "      <th>q</th>\n",
494
       "      <th>per</th>\n",
495
       "      <th>class</th>\n",
496
       "    </tr>\n",
497
       "  </thead>\n",
498
       "  <tbody>\n",
499
       "    <tr>\n",
500
       "      <th>794557</th>\n",
501
       "      <td>bT3S3411</td>\n",
502
       "      <td>3246664</td>\n",
503
       "      <td>3411T3</td>\n",
504
       "      <td>3411 T-3</td>\n",
505
       "      <td>NaN</td>\n",
506
       "      <td>N</td>\n",
507
       "      <td>N</td>\n",
508
       "      <td>NaN</td>\n",
509
       "      <td>NaN</td>\n",
510
       "      <td>2.073221</td>\n",
511
       "      <td>1428.185418</td>\n",
512
       "      <td>MBA</td>\n",
513
       "    </tr>\n",
514
       "    <tr>\n",
515
       "      <th>794558</th>\n",
516
       "      <td>bT3S3521</td>\n",
517
       "      <td>3246672</td>\n",
518
       "      <td>3521T3</td>\n",
519
       "      <td>3521 T-3</td>\n",
520
       "      <td>NaN</td>\n",
521
       "      <td>N</td>\n",
522
       "      <td>N</td>\n",
523
       "      <td>NaN</td>\n",
524
       "      <td>NaN</td>\n",
525
       "      <td>1.546423</td>\n",
526
       "      <td>1386.029296</td>\n",
527
       "      <td>MCA</td>\n",
528
       "    </tr>\n",
529
       "    <tr>\n",
530
       "      <th>794559</th>\n",
531
       "      <td>bT3S4571</td>\n",
532
       "      <td>3248621</td>\n",
533
       "      <td>4571T3</td>\n",
534
       "      <td>4571 T-3</td>\n",
535
       "      <td>NaN</td>\n",
536
       "      <td>N</td>\n",
537
       "      <td>N</td>\n",
538
       "      <td>NaN</td>\n",
539
       "      <td>NaN</td>\n",
540
       "      <td>1.813901</td>\n",
541
       "      <td>1484.222588</td>\n",
542
       "      <td>MBA</td>\n",
543
       "    </tr>\n",
544
       "    <tr>\n",
545
       "      <th>794560</th>\n",
546
       "      <td>bT3S4658</td>\n",
547
       "      <td>3248624</td>\n",
548
       "      <td>4658T3</td>\n",
549
       "      <td>4658 T-3</td>\n",
550
       "      <td>NaN</td>\n",
551
       "      <td>N</td>\n",
552
       "      <td>N</td>\n",
553
       "      <td>NaN</td>\n",
554
       "      <td>NaN</td>\n",
555
       "      <td>1.718022</td>\n",
556
       "      <td>1138.438109</td>\n",
557
       "      <td>MBA</td>\n",
558
       "    </tr>\n",
559
       "    <tr>\n",
560
       "      <th>794561</th>\n",
561
       "      <td>bT3S5154</td>\n",
562
       "      <td>3248651</td>\n",
563
       "      <td>5154T3</td>\n",
564
       "      <td>5154 T-3</td>\n",
565
       "      <td>NaN</td>\n",
566
       "      <td>N</td>\n",
567
       "      <td>N</td>\n",
568
       "      <td>NaN</td>\n",
569
       "      <td>NaN</td>\n",
570
       "      <td>2.239464</td>\n",
571
       "      <td>1985.374056</td>\n",
572
       "      <td>MBA</td>\n",
573
       "    </tr>\n",
574
       "  </tbody>\n",
575
       "</table>\n",
576
       "</div>"
577
      ],
578
      "text/plain": [
579
       "              id    spkid full_name      pdes name neo pha diameter prefix  \\\n",
580
       "794557  bT3S3411  3246664    3411T3  3411 T-3  NaN   N   N      NaN    NaN   \n",
581
       "794558  bT3S3521  3246672    3521T3  3521 T-3  NaN   N   N      NaN    NaN   \n",
582
       "794559  bT3S4571  3248621    4571T3  4571 T-3  NaN   N   N      NaN    NaN   \n",
583
       "794560  bT3S4658  3248624    4658T3  4658 T-3  NaN   N   N      NaN    NaN   \n",
584
       "794561  bT3S5154  3248651    5154T3  5154 T-3  NaN   N   N      NaN    NaN   \n",
585
       "\n",
586
       "               q          per class  \n",
587
       "794557  2.073221  1428.185418   MBA  \n",
588
       "794558  1.546423  1386.029296   MCA  \n",
589
       "794559  1.813901  1484.222588   MBA  \n",
590
       "794560  1.718022  1138.438109   MBA  \n",
591
       "794561  2.239464  1985.374056   MBA  "
592
      ]
593
     },
594
     "metadata": {},
595
     "output_type": "display_data"
596
    },
597
    {
598
     "data": {
599
      "text/html": [
600
       "<div>\n",
601
       "<style scoped>\n",
602
       "    .dataframe tbody tr th:only-of-type {\n",
603
       "        vertical-align: middle;\n",
604
       "    }\n",
605
       "\n",
606
       "    .dataframe tbody tr th {\n",
607
       "        vertical-align: top;\n",
608
       "    }\n",
609
       "\n",
610
       "    .dataframe thead th {\n",
611
       "        text-align: right;\n",
612
       "    }\n",
613
       "</style>\n",
614
       "<table border=\"1\" class=\"dataframe\">\n",
615
       "  <thead>\n",
616
       "    <tr style=\"text-align: right;\">\n",
617
       "      <th></th>\n",
618
       "      <th>id</th>\n",
619
       "      <th>spkid</th>\n",
620
       "      <th>full_name</th>\n",
621
       "      <th>pdes</th>\n",
622
       "      <th>name</th>\n",
623
       "      <th>neo</th>\n",
624
       "      <th>pha</th>\n",
625
       "      <th>diameter</th>\n",
626
       "      <th>prefix</th>\n",
627
       "      <th>q</th>\n",
628
       "      <th>per</th>\n",
629
       "      <th>class</th>\n",
630
       "      <th>diameter_km</th>\n",
631
       "    </tr>\n",
632
       "  </thead>\n",
633
       "  <tbody>\n",
634
       "    <tr>\n",
635
       "      <th>794557</th>\n",
636
       "      <td>bT3S3411</td>\n",
637
       "      <td>3246664</td>\n",
638
       "      <td>3411T3</td>\n",
639
       "      <td>3411 T-3</td>\n",
640
       "      <td>NaN</td>\n",
641
       "      <td>N</td>\n",
642
       "      <td>N</td>\n",
643
       "      <td>NaN</td>\n",
644
       "      <td>NaN</td>\n",
645
       "      <td>2.073221</td>\n",
646
       "      <td>1428.185418</td>\n",
647
       "      <td>MBA</td>\n",
648
       "      <td>NaN</td>\n",
649
       "    </tr>\n",
650
       "    <tr>\n",
651
       "      <th>794558</th>\n",
652
       "      <td>bT3S3521</td>\n",
653
       "      <td>3246672</td>\n",
654
       "      <td>3521T3</td>\n",
655
       "      <td>3521 T-3</td>\n",
656
       "      <td>NaN</td>\n",
657
       "      <td>N</td>\n",
658
       "      <td>N</td>\n",
659
       "      <td>NaN</td>\n",
660
       "      <td>NaN</td>\n",
661
       "      <td>1.546423</td>\n",
662
       "      <td>1386.029296</td>\n",
663
       "      <td>MCA</td>\n",
664
       "      <td>NaN</td>\n",
665
       "    </tr>\n",
666
       "    <tr>\n",
667
       "      <th>794559</th>\n",
668
       "      <td>bT3S4571</td>\n",
669
       "      <td>3248621</td>\n",
670
       "      <td>4571T3</td>\n",
671
       "      <td>4571 T-3</td>\n",
672
       "      <td>NaN</td>\n",
673
       "      <td>N</td>\n",
674
       "      <td>N</td>\n",
675
       "      <td>NaN</td>\n",
676
       "      <td>NaN</td>\n",
677
       "      <td>1.813901</td>\n",
678
       "      <td>1484.222588</td>\n",
679
       "      <td>MBA</td>\n",
680
       "      <td>NaN</td>\n",
681
       "    </tr>\n",
682
       "    <tr>\n",
683
       "      <th>794560</th>\n",
684
       "      <td>bT3S4658</td>\n",
685
       "      <td>3248624</td>\n",
686
       "      <td>4658T3</td>\n",
687
       "      <td>4658 T-3</td>\n",
688
       "      <td>NaN</td>\n",
689
       "      <td>N</td>\n",
690
       "      <td>N</td>\n",
691
       "      <td>NaN</td>\n",
692
       "      <td>NaN</td>\n",
693
       "      <td>1.718022</td>\n",
694
       "      <td>1138.438109</td>\n",
695
       "      <td>MBA</td>\n",
696
       "      <td>NaN</td>\n",
697
       "    </tr>\n",
698
       "    <tr>\n",
699
       "      <th>794561</th>\n",
700
       "      <td>bT3S5154</td>\n",
701
       "      <td>3248651</td>\n",
702
       "      <td>5154T3</td>\n",
703
       "      <td>5154 T-3</td>\n",
704
       "      <td>NaN</td>\n",
705
       "      <td>N</td>\n",
706
       "      <td>N</td>\n",
707
       "      <td>NaN</td>\n",
708
       "      <td>NaN</td>\n",
709
       "      <td>2.239464</td>\n",
710
       "      <td>1985.374056</td>\n",
711
       "      <td>MBA</td>\n",
712
       "      <td>NaN</td>\n",
713
       "    </tr>\n",
714
       "  </tbody>\n",
715
       "</table>\n",
716
       "</div>"
717
      ],
718
      "text/plain": [
719
       "              id    spkid full_name      pdes name neo pha diameter prefix  \\\n",
720
       "794557  bT3S3411  3246664    3411T3  3411 T-3  NaN   N   N      NaN    NaN   \n",
721
       "794558  bT3S3521  3246672    3521T3  3521 T-3  NaN   N   N      NaN    NaN   \n",
722
       "794559  bT3S4571  3248621    4571T3  4571 T-3  NaN   N   N      NaN    NaN   \n",
723
       "794560  bT3S4658  3248624    4658T3  4658 T-3  NaN   N   N      NaN    NaN   \n",
724
       "794561  bT3S5154  3248651    5154T3  5154 T-3  NaN   N   N      NaN    NaN   \n",
725
       "\n",
726
       "               q          per class  diameter_km  \n",
727
       "794557  2.073221  1428.185418   MBA          NaN  \n",
728
       "794558  1.546423  1386.029296   MCA          NaN  \n",
729
       "794559  1.813901  1484.222588   MBA          NaN  \n",
730
       "794560  1.718022  1138.438109   MBA          NaN  \n",
731
       "794561  2.239464  1985.374056   MBA          NaN  "
732
      ]
733
     },
734
     "metadata": {},
735
     "output_type": "display_data"
736
    },
737
    {
738
     "name": "stdout",
739
     "output_type": "stream",
740
     "text": [
741
      "44 diameter values that can be joined\n",
742
      "35 new diameter values added\n"
743
     ]
744
    },
745
    {
746
     "data": {
747
      "text/html": [
748
       "<div>\n",
749
       "<style scoped>\n",
750
       "    .dataframe tbody tr th:only-of-type {\n",
751
       "        vertical-align: middle;\n",
752
       "    }\n",
753
       "\n",
754
       "    .dataframe tbody tr th {\n",
755
       "        vertical-align: top;\n",
756
       "    }\n",
757
       "\n",
758
       "    .dataframe thead th {\n",
759
       "        text-align: right;\n",
760
       "    }\n",
761
       "</style>\n",
762
       "<table border=\"1\" class=\"dataframe\">\n",
763
       "  <thead>\n",
764
       "    <tr style=\"text-align: right;\">\n",
765
       "      <th></th>\n",
766
       "      <th>id</th>\n",
767
       "      <th>spkid</th>\n",
768
       "      <th>full_name</th>\n",
769
       "      <th>pdes</th>\n",
770
       "      <th>name</th>\n",
771
       "      <th>neo</th>\n",
772
       "      <th>pha</th>\n",
773
       "      <th>diameter</th>\n",
774
       "      <th>prefix</th>\n",
775
       "      <th>q</th>\n",
776
       "      <th>per</th>\n",
777
       "      <th>class</th>\n",
778
       "    </tr>\n",
779
       "  </thead>\n",
780
       "  <tbody>\n",
781
       "    <tr>\n",
782
       "      <th>794557</th>\n",
783
       "      <td>bT3S3411</td>\n",
784
       "      <td>3246664</td>\n",
785
       "      <td>3411T3</td>\n",
786
       "      <td>3411 T-3</td>\n",
787
       "      <td>NaN</td>\n",
788
       "      <td>N</td>\n",
789
       "      <td>N</td>\n",
790
       "      <td>NaN</td>\n",
791
       "      <td>NaN</td>\n",
792
       "      <td>2.073221</td>\n",
793
       "      <td>1428.185418</td>\n",
794
       "      <td>MBA</td>\n",
795
       "    </tr>\n",
796
       "    <tr>\n",
797
       "      <th>794558</th>\n",
798
       "      <td>bT3S3521</td>\n",
799
       "      <td>3246672</td>\n",
800
       "      <td>3521T3</td>\n",
801
       "      <td>3521 T-3</td>\n",
802
       "      <td>NaN</td>\n",
803
       "      <td>N</td>\n",
804
       "      <td>N</td>\n",
805
       "      <td>NaN</td>\n",
806
       "      <td>NaN</td>\n",
807
       "      <td>1.546423</td>\n",
808
       "      <td>1386.029296</td>\n",
809
       "      <td>MCA</td>\n",
810
       "    </tr>\n",
811
       "    <tr>\n",
812
       "      <th>794559</th>\n",
813
       "      <td>bT3S4571</td>\n",
814
       "      <td>3248621</td>\n",
815
       "      <td>4571T3</td>\n",
816
       "      <td>4571 T-3</td>\n",
817
       "      <td>NaN</td>\n",
818
       "      <td>N</td>\n",
819
       "      <td>N</td>\n",
820
       "      <td>NaN</td>\n",
821
       "      <td>NaN</td>\n",
822
       "      <td>1.813901</td>\n",
823
       "      <td>1484.222588</td>\n",
824
       "      <td>MBA</td>\n",
825
       "    </tr>\n",
826
       "    <tr>\n",
827
       "      <th>794560</th>\n",
828
       "      <td>bT3S4658</td>\n",
829
       "      <td>3248624</td>\n",
830
       "      <td>4658T3</td>\n",
831
       "      <td>4658 T-3</td>\n",
832
       "      <td>NaN</td>\n",
833
       "      <td>N</td>\n",
834
       "      <td>N</td>\n",
835
       "      <td>NaN</td>\n",
836
       "      <td>NaN</td>\n",
837
       "      <td>1.718022</td>\n",
838
       "      <td>1138.438109</td>\n",
839
       "      <td>MBA</td>\n",
840
       "    </tr>\n",
841
       "    <tr>\n",
842
       "      <th>794561</th>\n",
843
       "      <td>bT3S5154</td>\n",
844
       "      <td>3248651</td>\n",
845
       "      <td>5154T3</td>\n",
846
       "      <td>5154 T-3</td>\n",
847
       "      <td>NaN</td>\n",
848
       "      <td>N</td>\n",
849
       "      <td>N</td>\n",
850
       "      <td>NaN</td>\n",
851
       "      <td>NaN</td>\n",
852
       "      <td>2.239464</td>\n",
853
       "      <td>1985.374056</td>\n",
854
       "      <td>MBA</td>\n",
855
       "    </tr>\n",
856
       "  </tbody>\n",
857
       "</table>\n",
858
       "</div>"
859
      ],
860
      "text/plain": [
861
       "              id    spkid full_name      pdes name neo pha diameter prefix  \\\n",
862
       "794557  bT3S3411  3246664    3411T3  3411 T-3  NaN   N   N      NaN    NaN   \n",
863
       "794558  bT3S3521  3246672    3521T3  3521 T-3  NaN   N   N      NaN    NaN   \n",
864
       "794559  bT3S4571  3248621    4571T3  4571 T-3  NaN   N   N      NaN    NaN   \n",
865
       "794560  bT3S4658  3248624    4658T3  4658 T-3  NaN   N   N      NaN    NaN   \n",
866
       "794561  bT3S5154  3248651    5154T3  5154 T-3  NaN   N   N      NaN    NaN   \n",
867
       "\n",
868
       "               q          per class  \n",
869
       "794557  2.073221  1428.185418   MBA  \n",
870
       "794558  1.546423  1386.029296   MCA  \n",
871
       "794559  1.813901  1484.222588   MBA  \n",
872
       "794560  1.718022  1138.438109   MBA  \n",
873
       "794561  2.239464  1985.374056   MBA  "
874
      ]
875
     },
876
     "metadata": {},
877
     "output_type": "display_data"
878
    }
879
   ],
880
   "source": [
881
    "df = pd.read_csv('./data/all_asteroids.csv', low_memory=False)\n",
882
    "df['full_name'] = df['full_name'].str.replace(' ', '').str.replace('(', '').str.replace(')', '').str.replace('-', '')\n",
883
    "display(df.tail())\n",
884
    "\n",
885
    "diams = pd.read_csv('./data/diameters/TNO_Centaurs.csv', low_memory=False)\n",
886
    "diams.drop(['number', 'name', 'type'], axis=1, inplace=True)\n",
887
    "df = pd.merge(df, diams, on='full_name', how='left')\n",
888
    "display(df.tail())\n",
889
    "\n",
890
    "print(len(df[~pd.isnull(df['diameter_km'])]), 'diameter values that can be joined')\n",
891
    "original = len(df[~pd.isnull(df['diameter'])])\n",
892
    "df['diameter'] = df['diameter'].fillna(df['diameter_km'])\n",
893
    "print(len(df[~pd.isnull(df['diameter'])])-original, 'new diameter values added')\n",
894
    "df.drop(['diameter_km'], inplace=True, axis=1)\n",
895
    "\n",
896
    "savename = './data/all_asteroids_wrangled.csv'\n",
897
    "if not os.path.isfile(savename):\n",
898
    "    df.to_csv(savename, index=False)\n",
899
    "display(df.tail())"
900
   ]
901
  },
902
  {
903
   "cell_type": "code",
904
   "execution_count": null,
905
   "metadata": {},
906
   "outputs": [
907
    {
908
     "data": {
909
      "text/html": [
910
       "<div>\n",
911
       "<style scoped>\n",
912
       "    .dataframe tbody tr th:only-of-type {\n",
913
       "        vertical-align: middle;\n",
914
       "    }\n",
915
       "\n",
916
       "    .dataframe tbody tr th {\n",
917
       "        vertical-align: top;\n",
918
       "    }\n",
919
       "\n",
920
       "    .dataframe thead th {\n",
921
       "        text-align: right;\n",
922
       "    }\n",
923
       "</style>\n",
924
       "<table border=\"1\" class=\"dataframe\">\n",
925
       "  <thead>\n",
926
       "    <tr style=\"text-align: right;\">\n",
927
       "      <th></th>\n",
928
       "      <th>id</th>\n",
929
       "      <th>spkid</th>\n",
930
       "      <th>full_name</th>\n",
931
       "      <th>pdes</th>\n",
932
       "      <th>name</th>\n",
933
       "      <th>neo</th>\n",
934
       "      <th>pha</th>\n",
935
       "      <th>diameter</th>\n",
936
       "      <th>prefix</th>\n",
937
       "      <th>q</th>\n",
938
       "      <th>per</th>\n",
939
       "      <th>class</th>\n",
940
       "    </tr>\n",
941
       "  </thead>\n",
942
       "  <tbody>\n",
943
       "    <tr>\n",
944
       "      <th>134339</th>\n",
945
       "      <td>a0134340</td>\n",
946
       "      <td>2134340</td>\n",
947
       "      <td>134340Pluto</td>\n",
948
       "      <td>134340</td>\n",
949
       "      <td>Pluto</td>\n",
950
       "      <td>N</td>\n",
951
       "      <td>N</td>\n",
952
       "      <td>2361</td>\n",
953
       "      <td>NaN</td>\n",
954
       "      <td>29.573992</td>\n",
955
       "      <td>90487.276927</td>\n",
956
       "      <td>TNO</td>\n",
957
       "    </tr>\n",
958
       "  </tbody>\n",
959
       "</table>\n",
960
       "</div>"
961
      ],
962
      "text/plain": [
963
       "              id    spkid    full_name    pdes   name neo pha diameter prefix  \\\n",
964
       "134339  a0134340  2134340  134340Pluto  134340  Pluto   N   N     2361    NaN   \n",
965
       "\n",
966
       "                q           per class  \n",
967
       "134339  29.573992  90487.276927   TNO  "
968
      ]
969
     },
970
     "execution_count": null,
971
     "metadata": {},
972
     "output_type": "execute_result"
973
    }
974
   ],
975
   "source": [
976
    "# Check that Pluto has been added appropriately\n",
977
    "df[df['full_name'] == '134340Pluto']"
978
   ]
979
  },
980
  {
981
   "cell_type": "code",
982
   "execution_count": null,
983
   "metadata": {},
984
   "outputs": [],
985
   "source": []
986
  }
987
 ],
988
 "metadata": {
989
  "kernelspec": {
990
   "display_name": "Python 3",
991
   "language": "python",
992
   "name": "python3"
993
  },
994
  "language_info": {
995
   "codemirror_mode": {
996
    "name": "ipython",
997
    "version": 3
998
   },
999
   "file_extension": ".py",
1000
   "mimetype": "text/x-python",
1001
   "name": "python",
1002
   "nbconvert_exporter": "python",
1003
   "pygments_lexer": "ipython3",
1004
   "version": "3.7.3"
1005
  }
1006
 },
1007
 "nbformat": 4,
1008
 "nbformat_minor": 2
1009
}
1010

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.