atlas_of_asteroids
/
2_merge_TNO_diameter_data.ipynb
1009 строк · 32.5 Кб
1{
2"cells": [
3{
4"cell_type": "code",
5"execution_count": null,
6"metadata": {},
7"outputs": [
8{
9"name": "stdout",
10"output_type": "stream",
11"text": [
12"numpy 1.15.4\n",
13"pandas 0.23.4\n",
14"watermark 1.8.1\n",
15"ELEANOR LUTZ 2019-06-03 \n",
16"\n",
17"CPython 3.7.1\n",
18"IPython 7.2.0\n",
19"\n",
20"compiler : MSC v.1900 64 bit (AMD64)\n",
21"system : Windows\n",
22"release : 10\n",
23"machine : AMD64\n",
24"processor : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
25"CPU cores : 12\n",
26"interpreter: 64bit\n"
27]
28}
29],
30"source": [
31"# Watermark is not required for this code, but is included for information. \n",
32"import watermark\n",
33"%load_ext watermark\n",
34"%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
35]
36},
37{
38"cell_type": "code",
39"execution_count": null,
40"metadata": {},
41"outputs": [],
42"source": [
43"import numpy as np\n",
44"import pandas as pd\n",
45"import os.path"
46]
47},
48{
49"cell_type": "code",
50"execution_count": null,
51"metadata": {},
52"outputs": [
53{
54"data": {
55"text/html": [
56"<div>\n",
57"<style scoped>\n",
58" .dataframe tbody tr th:only-of-type {\n",
59" vertical-align: middle;\n",
60" }\n",
61"\n",
62" .dataframe tbody tr th {\n",
63" vertical-align: top;\n",
64" }\n",
65"\n",
66" .dataframe thead th {\n",
67" text-align: right;\n",
68" }\n",
69"</style>\n",
70"<table border=\"1\" class=\"dataframe\">\n",
71" <thead>\n",
72" <tr style=\"text-align: right;\">\n",
73" <th></th>\n",
74" <th>0</th>\n",
75" </tr>\n",
76" </thead>\n",
77" <tbody>\n",
78" <tr>\n",
79" <th>0</th>\n",
80" <td>2060 Chiron 1977 UB 13.7 0...</td>\n",
81" </tr>\n",
82" <tr>\n",
83" <th>1</th>\n",
84" <td>2060 Chiron 1977 UB 13.7 0...</td>\n",
85" </tr>\n",
86" <tr>\n",
87" <th>2</th>\n",
88" <td>2060 Chiron 1977 UB 13.7 0...</td>\n",
89" </tr>\n",
90" <tr>\n",
91" <th>3</th>\n",
92" <td>2060 Chiron 1977 UB 13.7 0...</td>\n",
93" </tr>\n",
94" <tr>\n",
95" <th>4</th>\n",
96" <td>2060 Chiron 1977 UB 13.7 0...</td>\n",
97" </tr>\n",
98" </tbody>\n",
99"</table>\n",
100"</div>"
101],
102"text/plain": [
103" 0\n",
104"0 2060 Chiron 1977 UB 13.7 0...\n",
105"1 2060 Chiron 1977 UB 13.7 0...\n",
106"2 2060 Chiron 1977 UB 13.7 0...\n",
107"3 2060 Chiron 1977 UB 13.7 0...\n",
108"4 2060 Chiron 1977 UB 13.7 0..."
109]
110},
111"metadata": {},
112"output_type": "display_data"
113},
114{
115"data": {
116"text/html": [
117"<div>\n",
118"<style scoped>\n",
119" .dataframe tbody tr th:only-of-type {\n",
120" vertical-align: middle;\n",
121" }\n",
122"\n",
123" .dataframe tbody tr th {\n",
124" vertical-align: top;\n",
125" }\n",
126"\n",
127" .dataframe thead th {\n",
128" text-align: right;\n",
129" }\n",
130"</style>\n",
131"<table border=\"1\" class=\"dataframe\">\n",
132" <thead>\n",
133" <tr style=\"text-align: right;\">\n",
134" <th></th>\n",
135" <th>number</th>\n",
136" <th>name</th>\n",
137" <th>full_name</th>\n",
138" <th>axis_AU</th>\n",
139" <th>eccentricity</th>\n",
140" <th>inclination</th>\n",
141" <th>type</th>\n",
142" <th>companions</th>\n",
143" <th>magnitude</th>\n",
144" <th>abs_magnitude</th>\n",
145" <th>...</th>\n",
146" <th>albedo_err2</th>\n",
147" <th>albedo_notecode</th>\n",
148" <th>albedo_colorcode</th>\n",
149" <th>density</th>\n",
150" <th>density_err1</th>\n",
151" <th>density_err2</th>\n",
152" <th>density_notecode</th>\n",
153" <th>methods_da</th>\n",
154" <th>methods_density</th>\n",
155" <th>reference</th>\n",
156" </tr>\n",
157" </thead>\n",
158" <tbody>\n",
159" <tr>\n",
160" <th>0</th>\n",
161" <td>2060</td>\n",
162" <td>Chiron</td>\n",
163" <td>1977UB</td>\n",
164" <td>13.7</td>\n",
165" <td>0.38</td>\n",
166" <td>7.</td>\n",
167" <td>CEN</td>\n",
168" <td>0</td>\n",
169" <td>5.8</td>\n",
170" <td>6.900</td>\n",
171" <td>...</td>\n",
172" <td>0.035</td>\n",
173" <td>*</td>\n",
174" <td>-</td>\n",
175" <td>-9.999</td>\n",
176" <td>-9.999</td>\n",
177" <td>-9.999</td>\n",
178" <td>-</td>\n",
179" <td>T</td>\n",
180" <td>-</td>\n",
181" <td>L84a</td>\n",
182" </tr>\n",
183" <tr>\n",
184" <th>1</th>\n",
185" <td>2060</td>\n",
186" <td>Chiron</td>\n",
187" <td>1977UB</td>\n",
188" <td>13.7</td>\n",
189" <td>0.38</td>\n",
190" <td>7.</td>\n",
191" <td>CEN</td>\n",
192" <td>0</td>\n",
193" <td>5.8</td>\n",
194" <td>-99.999</td>\n",
195" <td>...</td>\n",
196" <td>-9.999</td>\n",
197" <td>-</td>\n",
198" <td>G</td>\n",
199" <td>-9.999</td>\n",
200" <td>-9.999</td>\n",
201" <td>-9.999</td>\n",
202" <td>-</td>\n",
203" <td>T</td>\n",
204" <td>-</td>\n",
205" <td>S91a</td>\n",
206" </tr>\n",
207" <tr>\n",
208" <th>2</th>\n",
209" <td>2060</td>\n",
210" <td>Chiron</td>\n",
211" <td>1977UB</td>\n",
212" <td>13.7</td>\n",
213" <td>0.38</td>\n",
214" <td>7.</td>\n",
215" <td>CEN</td>\n",
216" <td>0</td>\n",
217" <td>5.8</td>\n",
218" <td>6.800</td>\n",
219" <td>...</td>\n",
220" <td>-9.999</td>\n",
221" <td>*</td>\n",
222" <td>G</td>\n",
223" <td>-9.999</td>\n",
224" <td>-9.999</td>\n",
225" <td>-9.999</td>\n",
226" <td>-</td>\n",
227" <td>T</td>\n",
228" <td>-</td>\n",
229" <td>J92a</td>\n",
230" </tr>\n",
231" <tr>\n",
232" <th>3</th>\n",
233" <td>2060</td>\n",
234" <td>Chiron</td>\n",
235" <td>1977UB</td>\n",
236" <td>13.7</td>\n",
237" <td>0.38</td>\n",
238" <td>7.</td>\n",
239" <td>CEN</td>\n",
240" <td>0</td>\n",
241" <td>5.8</td>\n",
242" <td>-99.999</td>\n",
243" <td>...</td>\n",
244" <td>-9.999</td>\n",
245" <td>-</td>\n",
246" <td>-</td>\n",
247" <td>-9.999</td>\n",
248" <td>-9.999</td>\n",
249" <td>-9.999</td>\n",
250" <td>-</td>\n",
251" <td>O</td>\n",
252" <td>-</td>\n",
253" <td>B93a</td>\n",
254" </tr>\n",
255" <tr>\n",
256" <th>4</th>\n",
257" <td>2060</td>\n",
258" <td>Chiron</td>\n",
259" <td>1977UB</td>\n",
260" <td>13.7</td>\n",
261" <td>0.38</td>\n",
262" <td>7.</td>\n",
263" <td>CEN</td>\n",
264" <td>0</td>\n",
265" <td>5.8</td>\n",
266" <td>6.900</td>\n",
267" <td>...</td>\n",
268" <td>0.030</td>\n",
269" <td>*</td>\n",
270" <td>-</td>\n",
271" <td>-9.999</td>\n",
272" <td>-9.999</td>\n",
273" <td>-9.999</td>\n",
274" <td>-</td>\n",
275" <td>T</td>\n",
276" <td>-</td>\n",
277" <td>C94a</td>\n",
278" </tr>\n",
279" </tbody>\n",
280"</table>\n",
281"<p>5 rows × 35 columns</p>\n",
282"</div>"
283],
284"text/plain": [
285" number name full_name axis_AU eccentricity inclination type companions \\\n",
286"0 2060 Chiron 1977UB 13.7 0.38 7. CEN 0 \n",
287"1 2060 Chiron 1977UB 13.7 0.38 7. CEN 0 \n",
288"2 2060 Chiron 1977UB 13.7 0.38 7. CEN 0 \n",
289"3 2060 Chiron 1977UB 13.7 0.38 7. CEN 0 \n",
290"4 2060 Chiron 1977UB 13.7 0.38 7. CEN 0 \n",
291"\n",
292" magnitude abs_magnitude ... albedo_err2 albedo_notecode \\\n",
293"0 5.8 6.900 ... 0.035 * \n",
294"1 5.8 -99.999 ... -9.999 - \n",
295"2 5.8 6.800 ... -9.999 * \n",
296"3 5.8 -99.999 ... -9.999 - \n",
297"4 5.8 6.900 ... 0.030 * \n",
298"\n",
299" albedo_colorcode density density_err1 density_err2 density_notecode \\\n",
300"0 - -9.999 -9.999 -9.999 - \n",
301"1 G -9.999 -9.999 -9.999 - \n",
302"2 G -9.999 -9.999 -9.999 - \n",
303"3 - -9.999 -9.999 -9.999 - \n",
304"4 - -9.999 -9.999 -9.999 - \n",
305"\n",
306" methods_da methods_density reference \n",
307"0 T - L84a \n",
308"1 T - S91a \n",
309"2 T - J92a \n",
310"3 O - B93a \n",
311"4 T - C94a \n",
312"\n",
313"[5 rows x 35 columns]"
314]
315},
316"metadata": {},
317"output_type": "display_data"
318},
319{
320"data": {
321"text/html": [
322"<div>\n",
323"<style scoped>\n",
324" .dataframe tbody tr th:only-of-type {\n",
325" vertical-align: middle;\n",
326" }\n",
327"\n",
328" .dataframe tbody tr th {\n",
329" vertical-align: top;\n",
330" }\n",
331"\n",
332" .dataframe thead th {\n",
333" text-align: right;\n",
334" }\n",
335"</style>\n",
336"<table border=\"1\" class=\"dataframe\">\n",
337" <thead>\n",
338" <tr style=\"text-align: right;\">\n",
339" <th></th>\n",
340" <th>number</th>\n",
341" <th>name</th>\n",
342" <th>full_name</th>\n",
343" <th>type</th>\n",
344" <th>diameter_km</th>\n",
345" </tr>\n",
346" </thead>\n",
347" <tbody>\n",
348" <tr>\n",
349" <th>0</th>\n",
350" <td>0</td>\n",
351" <td></td>\n",
352" <td>1996TS66</td>\n",
353" <td>CUB</td>\n",
354" <td>163.00</td>\n",
355" </tr>\n",
356" <tr>\n",
357" <th>1</th>\n",
358" <td>0</td>\n",
359" <td></td>\n",
360" <td>1998WW31</td>\n",
361" <td>CUB</td>\n",
362" <td>170.30</td>\n",
363" </tr>\n",
364" <tr>\n",
365" <th>2</th>\n",
366" <td>0</td>\n",
367" <td></td>\n",
368" <td>1999OJ4</td>\n",
369" <td>CUB</td>\n",
370" <td>103.95</td>\n",
371" </tr>\n",
372" <tr>\n",
373" <th>3</th>\n",
374" <td>0</td>\n",
375" <td></td>\n",
376" <td>2000CF105</td>\n",
377" <td>CUB</td>\n",
378" <td>78.30</td>\n",
379" </tr>\n",
380" <tr>\n",
381" <th>4</th>\n",
382" <td>0</td>\n",
383" <td></td>\n",
384" <td>2000GM137</td>\n",
385" <td>CEN</td>\n",
386" <td>8.80</td>\n",
387" </tr>\n",
388" </tbody>\n",
389"</table>\n",
390"</div>"
391],
392"text/plain": [
393" number name full_name type diameter_km\n",
394"0 0 1996TS66 CUB 163.00\n",
395"1 0 1998WW31 CUB 170.30\n",
396"2 0 1999OJ4 CUB 103.95\n",
397"3 0 2000CF105 CUB 78.30\n",
398"4 0 2000GM137 CEN 8.80"
399]
400},
401"metadata": {},
402"output_type": "display_data"
403},
404{
405"name": "stdout",
406"output_type": "stream",
407"text": [
408"179 unique asteroids\n"
409]
410}
411],
412"source": [
413"# Manually assign column names from bundle_description.txt file\n",
414"colnames = ['number', 'name', 'full_name', 'axis_AU', 'eccentricity', 'inclination',\n",
415" 'type', 'companions', 'magnitude', 'abs_magnitude', 'abs_magnitude_err', \n",
416" 'diameter_km', 'e_diameter_err1', 'e_diameter_err2', 'e_diameter_notecode', \n",
417" 'p_diameter', 'p_diameter_err1', 'p_diameter_err2', 'p_diameter_notecode',\n",
418" 'c_diameter', 'c_diameter_err1', 'c_diameter_err2', 'c_diameter_notecode',\n",
419" 'albedo', 'albedo_err1', 'albedo_err2', 'albedo_notecode', 'albedo_colorcode', \n",
420" 'density', 'density_err1', 'density_err2', 'density_notecode', \n",
421" 'methods_da', 'methods_density', 'reference'\n",
422" ]\n",
423"\n",
424"diams = pd.read_csv('./data/diameters/tno-centaur_diam-albedo-density/data/tno_centaur_diam_alb_dens.tab', \n",
425" header=None, sep='\\t', encoding='latin-1')\n",
426"\n",
427"display(diams.head())\n",
428"diams = diams[0].str.split(expand=True)\n",
429"diams[2] = diams[2] + diams[3]\n",
430"diams.drop([3], inplace=True, axis=1)\n",
431"\n",
432"diams.columns = colnames\n",
433"display(diams.head())\n",
434"\n",
435"diams = diams[['number', 'name', 'full_name', 'type', 'diameter_km', 'p_diameter']]\n",
436"diams['diameter_km'] = diams['diameter_km'].astype(float)\n",
437"diams['p_diameter'] = diams['p_diameter'].astype(float)\n",
438"diams.loc[~(diams['diameter_km'] > 0), 'diameter_km']=np.nan\n",
439"diams.loc[~(diams['p_diameter'] > 0), 'p_diameter']=np.nan\n",
440"diams['diameter_km'] = diams['diameter_km'].fillna(diams['p_diameter'])\n",
441"diams = diams[~pd.isnull(diams['diameter_km'])]\n",
442"\n",
443"diams.set_index(['number', 'name', 'full_name', 'type'], inplace=True)\n",
444"diams = diams.groupby(['number', 'name', 'full_name', 'type']).median()\n",
445"diams = diams.reset_index()\n",
446"diams['name'] = diams['name'].str.replace('-', '')\n",
447"diams.loc[diams['name'] == 'Pluto', 'full_name'] = '134340Pluto'\n",
448"diams.drop(['p_diameter'], axis=1, inplace=True)\n",
449"\n",
450"display(diams.head())\n",
451"savename = './data/diameters/TNO_Centaurs.csv'\n",
452"if not os.path.isfile(savename):\n",
453" diams.to_csv(savename, index=False)\n",
454"\n",
455"print(len(diams), 'unique asteroids')"
456]
457},
458{
459"cell_type": "code",
460"execution_count": null,
461"metadata": {},
462"outputs": [
463{
464"data": {
465"text/html": [
466"<div>\n",
467"<style scoped>\n",
468" .dataframe tbody tr th:only-of-type {\n",
469" vertical-align: middle;\n",
470" }\n",
471"\n",
472" .dataframe tbody tr th {\n",
473" vertical-align: top;\n",
474" }\n",
475"\n",
476" .dataframe thead th {\n",
477" text-align: right;\n",
478" }\n",
479"</style>\n",
480"<table border=\"1\" class=\"dataframe\">\n",
481" <thead>\n",
482" <tr style=\"text-align: right;\">\n",
483" <th></th>\n",
484" <th>id</th>\n",
485" <th>spkid</th>\n",
486" <th>full_name</th>\n",
487" <th>pdes</th>\n",
488" <th>name</th>\n",
489" <th>neo</th>\n",
490" <th>pha</th>\n",
491" <th>diameter</th>\n",
492" <th>prefix</th>\n",
493" <th>q</th>\n",
494" <th>per</th>\n",
495" <th>class</th>\n",
496" </tr>\n",
497" </thead>\n",
498" <tbody>\n",
499" <tr>\n",
500" <th>794557</th>\n",
501" <td>bT3S3411</td>\n",
502" <td>3246664</td>\n",
503" <td>3411T3</td>\n",
504" <td>3411 T-3</td>\n",
505" <td>NaN</td>\n",
506" <td>N</td>\n",
507" <td>N</td>\n",
508" <td>NaN</td>\n",
509" <td>NaN</td>\n",
510" <td>2.073221</td>\n",
511" <td>1428.185418</td>\n",
512" <td>MBA</td>\n",
513" </tr>\n",
514" <tr>\n",
515" <th>794558</th>\n",
516" <td>bT3S3521</td>\n",
517" <td>3246672</td>\n",
518" <td>3521T3</td>\n",
519" <td>3521 T-3</td>\n",
520" <td>NaN</td>\n",
521" <td>N</td>\n",
522" <td>N</td>\n",
523" <td>NaN</td>\n",
524" <td>NaN</td>\n",
525" <td>1.546423</td>\n",
526" <td>1386.029296</td>\n",
527" <td>MCA</td>\n",
528" </tr>\n",
529" <tr>\n",
530" <th>794559</th>\n",
531" <td>bT3S4571</td>\n",
532" <td>3248621</td>\n",
533" <td>4571T3</td>\n",
534" <td>4571 T-3</td>\n",
535" <td>NaN</td>\n",
536" <td>N</td>\n",
537" <td>N</td>\n",
538" <td>NaN</td>\n",
539" <td>NaN</td>\n",
540" <td>1.813901</td>\n",
541" <td>1484.222588</td>\n",
542" <td>MBA</td>\n",
543" </tr>\n",
544" <tr>\n",
545" <th>794560</th>\n",
546" <td>bT3S4658</td>\n",
547" <td>3248624</td>\n",
548" <td>4658T3</td>\n",
549" <td>4658 T-3</td>\n",
550" <td>NaN</td>\n",
551" <td>N</td>\n",
552" <td>N</td>\n",
553" <td>NaN</td>\n",
554" <td>NaN</td>\n",
555" <td>1.718022</td>\n",
556" <td>1138.438109</td>\n",
557" <td>MBA</td>\n",
558" </tr>\n",
559" <tr>\n",
560" <th>794561</th>\n",
561" <td>bT3S5154</td>\n",
562" <td>3248651</td>\n",
563" <td>5154T3</td>\n",
564" <td>5154 T-3</td>\n",
565" <td>NaN</td>\n",
566" <td>N</td>\n",
567" <td>N</td>\n",
568" <td>NaN</td>\n",
569" <td>NaN</td>\n",
570" <td>2.239464</td>\n",
571" <td>1985.374056</td>\n",
572" <td>MBA</td>\n",
573" </tr>\n",
574" </tbody>\n",
575"</table>\n",
576"</div>"
577],
578"text/plain": [
579" id spkid full_name pdes name neo pha diameter prefix \\\n",
580"794557 bT3S3411 3246664 3411T3 3411 T-3 NaN N N NaN NaN \n",
581"794558 bT3S3521 3246672 3521T3 3521 T-3 NaN N N NaN NaN \n",
582"794559 bT3S4571 3248621 4571T3 4571 T-3 NaN N N NaN NaN \n",
583"794560 bT3S4658 3248624 4658T3 4658 T-3 NaN N N NaN NaN \n",
584"794561 bT3S5154 3248651 5154T3 5154 T-3 NaN N N NaN NaN \n",
585"\n",
586" q per class \n",
587"794557 2.073221 1428.185418 MBA \n",
588"794558 1.546423 1386.029296 MCA \n",
589"794559 1.813901 1484.222588 MBA \n",
590"794560 1.718022 1138.438109 MBA \n",
591"794561 2.239464 1985.374056 MBA "
592]
593},
594"metadata": {},
595"output_type": "display_data"
596},
597{
598"data": {
599"text/html": [
600"<div>\n",
601"<style scoped>\n",
602" .dataframe tbody tr th:only-of-type {\n",
603" vertical-align: middle;\n",
604" }\n",
605"\n",
606" .dataframe tbody tr th {\n",
607" vertical-align: top;\n",
608" }\n",
609"\n",
610" .dataframe thead th {\n",
611" text-align: right;\n",
612" }\n",
613"</style>\n",
614"<table border=\"1\" class=\"dataframe\">\n",
615" <thead>\n",
616" <tr style=\"text-align: right;\">\n",
617" <th></th>\n",
618" <th>id</th>\n",
619" <th>spkid</th>\n",
620" <th>full_name</th>\n",
621" <th>pdes</th>\n",
622" <th>name</th>\n",
623" <th>neo</th>\n",
624" <th>pha</th>\n",
625" <th>diameter</th>\n",
626" <th>prefix</th>\n",
627" <th>q</th>\n",
628" <th>per</th>\n",
629" <th>class</th>\n",
630" <th>diameter_km</th>\n",
631" </tr>\n",
632" </thead>\n",
633" <tbody>\n",
634" <tr>\n",
635" <th>794557</th>\n",
636" <td>bT3S3411</td>\n",
637" <td>3246664</td>\n",
638" <td>3411T3</td>\n",
639" <td>3411 T-3</td>\n",
640" <td>NaN</td>\n",
641" <td>N</td>\n",
642" <td>N</td>\n",
643" <td>NaN</td>\n",
644" <td>NaN</td>\n",
645" <td>2.073221</td>\n",
646" <td>1428.185418</td>\n",
647" <td>MBA</td>\n",
648" <td>NaN</td>\n",
649" </tr>\n",
650" <tr>\n",
651" <th>794558</th>\n",
652" <td>bT3S3521</td>\n",
653" <td>3246672</td>\n",
654" <td>3521T3</td>\n",
655" <td>3521 T-3</td>\n",
656" <td>NaN</td>\n",
657" <td>N</td>\n",
658" <td>N</td>\n",
659" <td>NaN</td>\n",
660" <td>NaN</td>\n",
661" <td>1.546423</td>\n",
662" <td>1386.029296</td>\n",
663" <td>MCA</td>\n",
664" <td>NaN</td>\n",
665" </tr>\n",
666" <tr>\n",
667" <th>794559</th>\n",
668" <td>bT3S4571</td>\n",
669" <td>3248621</td>\n",
670" <td>4571T3</td>\n",
671" <td>4571 T-3</td>\n",
672" <td>NaN</td>\n",
673" <td>N</td>\n",
674" <td>N</td>\n",
675" <td>NaN</td>\n",
676" <td>NaN</td>\n",
677" <td>1.813901</td>\n",
678" <td>1484.222588</td>\n",
679" <td>MBA</td>\n",
680" <td>NaN</td>\n",
681" </tr>\n",
682" <tr>\n",
683" <th>794560</th>\n",
684" <td>bT3S4658</td>\n",
685" <td>3248624</td>\n",
686" <td>4658T3</td>\n",
687" <td>4658 T-3</td>\n",
688" <td>NaN</td>\n",
689" <td>N</td>\n",
690" <td>N</td>\n",
691" <td>NaN</td>\n",
692" <td>NaN</td>\n",
693" <td>1.718022</td>\n",
694" <td>1138.438109</td>\n",
695" <td>MBA</td>\n",
696" <td>NaN</td>\n",
697" </tr>\n",
698" <tr>\n",
699" <th>794561</th>\n",
700" <td>bT3S5154</td>\n",
701" <td>3248651</td>\n",
702" <td>5154T3</td>\n",
703" <td>5154 T-3</td>\n",
704" <td>NaN</td>\n",
705" <td>N</td>\n",
706" <td>N</td>\n",
707" <td>NaN</td>\n",
708" <td>NaN</td>\n",
709" <td>2.239464</td>\n",
710" <td>1985.374056</td>\n",
711" <td>MBA</td>\n",
712" <td>NaN</td>\n",
713" </tr>\n",
714" </tbody>\n",
715"</table>\n",
716"</div>"
717],
718"text/plain": [
719" id spkid full_name pdes name neo pha diameter prefix \\\n",
720"794557 bT3S3411 3246664 3411T3 3411 T-3 NaN N N NaN NaN \n",
721"794558 bT3S3521 3246672 3521T3 3521 T-3 NaN N N NaN NaN \n",
722"794559 bT3S4571 3248621 4571T3 4571 T-3 NaN N N NaN NaN \n",
723"794560 bT3S4658 3248624 4658T3 4658 T-3 NaN N N NaN NaN \n",
724"794561 bT3S5154 3248651 5154T3 5154 T-3 NaN N N NaN NaN \n",
725"\n",
726" q per class diameter_km \n",
727"794557 2.073221 1428.185418 MBA NaN \n",
728"794558 1.546423 1386.029296 MCA NaN \n",
729"794559 1.813901 1484.222588 MBA NaN \n",
730"794560 1.718022 1138.438109 MBA NaN \n",
731"794561 2.239464 1985.374056 MBA NaN "
732]
733},
734"metadata": {},
735"output_type": "display_data"
736},
737{
738"name": "stdout",
739"output_type": "stream",
740"text": [
741"44 diameter values that can be joined\n",
742"35 new diameter values added\n"
743]
744},
745{
746"data": {
747"text/html": [
748"<div>\n",
749"<style scoped>\n",
750" .dataframe tbody tr th:only-of-type {\n",
751" vertical-align: middle;\n",
752" }\n",
753"\n",
754" .dataframe tbody tr th {\n",
755" vertical-align: top;\n",
756" }\n",
757"\n",
758" .dataframe thead th {\n",
759" text-align: right;\n",
760" }\n",
761"</style>\n",
762"<table border=\"1\" class=\"dataframe\">\n",
763" <thead>\n",
764" <tr style=\"text-align: right;\">\n",
765" <th></th>\n",
766" <th>id</th>\n",
767" <th>spkid</th>\n",
768" <th>full_name</th>\n",
769" <th>pdes</th>\n",
770" <th>name</th>\n",
771" <th>neo</th>\n",
772" <th>pha</th>\n",
773" <th>diameter</th>\n",
774" <th>prefix</th>\n",
775" <th>q</th>\n",
776" <th>per</th>\n",
777" <th>class</th>\n",
778" </tr>\n",
779" </thead>\n",
780" <tbody>\n",
781" <tr>\n",
782" <th>794557</th>\n",
783" <td>bT3S3411</td>\n",
784" <td>3246664</td>\n",
785" <td>3411T3</td>\n",
786" <td>3411 T-3</td>\n",
787" <td>NaN</td>\n",
788" <td>N</td>\n",
789" <td>N</td>\n",
790" <td>NaN</td>\n",
791" <td>NaN</td>\n",
792" <td>2.073221</td>\n",
793" <td>1428.185418</td>\n",
794" <td>MBA</td>\n",
795" </tr>\n",
796" <tr>\n",
797" <th>794558</th>\n",
798" <td>bT3S3521</td>\n",
799" <td>3246672</td>\n",
800" <td>3521T3</td>\n",
801" <td>3521 T-3</td>\n",
802" <td>NaN</td>\n",
803" <td>N</td>\n",
804" <td>N</td>\n",
805" <td>NaN</td>\n",
806" <td>NaN</td>\n",
807" <td>1.546423</td>\n",
808" <td>1386.029296</td>\n",
809" <td>MCA</td>\n",
810" </tr>\n",
811" <tr>\n",
812" <th>794559</th>\n",
813" <td>bT3S4571</td>\n",
814" <td>3248621</td>\n",
815" <td>4571T3</td>\n",
816" <td>4571 T-3</td>\n",
817" <td>NaN</td>\n",
818" <td>N</td>\n",
819" <td>N</td>\n",
820" <td>NaN</td>\n",
821" <td>NaN</td>\n",
822" <td>1.813901</td>\n",
823" <td>1484.222588</td>\n",
824" <td>MBA</td>\n",
825" </tr>\n",
826" <tr>\n",
827" <th>794560</th>\n",
828" <td>bT3S4658</td>\n",
829" <td>3248624</td>\n",
830" <td>4658T3</td>\n",
831" <td>4658 T-3</td>\n",
832" <td>NaN</td>\n",
833" <td>N</td>\n",
834" <td>N</td>\n",
835" <td>NaN</td>\n",
836" <td>NaN</td>\n",
837" <td>1.718022</td>\n",
838" <td>1138.438109</td>\n",
839" <td>MBA</td>\n",
840" </tr>\n",
841" <tr>\n",
842" <th>794561</th>\n",
843" <td>bT3S5154</td>\n",
844" <td>3248651</td>\n",
845" <td>5154T3</td>\n",
846" <td>5154 T-3</td>\n",
847" <td>NaN</td>\n",
848" <td>N</td>\n",
849" <td>N</td>\n",
850" <td>NaN</td>\n",
851" <td>NaN</td>\n",
852" <td>2.239464</td>\n",
853" <td>1985.374056</td>\n",
854" <td>MBA</td>\n",
855" </tr>\n",
856" </tbody>\n",
857"</table>\n",
858"</div>"
859],
860"text/plain": [
861" id spkid full_name pdes name neo pha diameter prefix \\\n",
862"794557 bT3S3411 3246664 3411T3 3411 T-3 NaN N N NaN NaN \n",
863"794558 bT3S3521 3246672 3521T3 3521 T-3 NaN N N NaN NaN \n",
864"794559 bT3S4571 3248621 4571T3 4571 T-3 NaN N N NaN NaN \n",
865"794560 bT3S4658 3248624 4658T3 4658 T-3 NaN N N NaN NaN \n",
866"794561 bT3S5154 3248651 5154T3 5154 T-3 NaN N N NaN NaN \n",
867"\n",
868" q per class \n",
869"794557 2.073221 1428.185418 MBA \n",
870"794558 1.546423 1386.029296 MCA \n",
871"794559 1.813901 1484.222588 MBA \n",
872"794560 1.718022 1138.438109 MBA \n",
873"794561 2.239464 1985.374056 MBA "
874]
875},
876"metadata": {},
877"output_type": "display_data"
878}
879],
880"source": [
881"df = pd.read_csv('./data/all_asteroids.csv', low_memory=False)\n",
882"df['full_name'] = df['full_name'].str.replace(' ', '').str.replace('(', '').str.replace(')', '').str.replace('-', '')\n",
883"display(df.tail())\n",
884"\n",
885"diams = pd.read_csv('./data/diameters/TNO_Centaurs.csv', low_memory=False)\n",
886"diams.drop(['number', 'name', 'type'], axis=1, inplace=True)\n",
887"df = pd.merge(df, diams, on='full_name', how='left')\n",
888"display(df.tail())\n",
889"\n",
890"print(len(df[~pd.isnull(df['diameter_km'])]), 'diameter values that can be joined')\n",
891"original = len(df[~pd.isnull(df['diameter'])])\n",
892"df['diameter'] = df['diameter'].fillna(df['diameter_km'])\n",
893"print(len(df[~pd.isnull(df['diameter'])])-original, 'new diameter values added')\n",
894"df.drop(['diameter_km'], inplace=True, axis=1)\n",
895"\n",
896"savename = './data/all_asteroids_wrangled.csv'\n",
897"if not os.path.isfile(savename):\n",
898" df.to_csv(savename, index=False)\n",
899"display(df.tail())"
900]
901},
902{
903"cell_type": "code",
904"execution_count": null,
905"metadata": {},
906"outputs": [
907{
908"data": {
909"text/html": [
910"<div>\n",
911"<style scoped>\n",
912" .dataframe tbody tr th:only-of-type {\n",
913" vertical-align: middle;\n",
914" }\n",
915"\n",
916" .dataframe tbody tr th {\n",
917" vertical-align: top;\n",
918" }\n",
919"\n",
920" .dataframe thead th {\n",
921" text-align: right;\n",
922" }\n",
923"</style>\n",
924"<table border=\"1\" class=\"dataframe\">\n",
925" <thead>\n",
926" <tr style=\"text-align: right;\">\n",
927" <th></th>\n",
928" <th>id</th>\n",
929" <th>spkid</th>\n",
930" <th>full_name</th>\n",
931" <th>pdes</th>\n",
932" <th>name</th>\n",
933" <th>neo</th>\n",
934" <th>pha</th>\n",
935" <th>diameter</th>\n",
936" <th>prefix</th>\n",
937" <th>q</th>\n",
938" <th>per</th>\n",
939" <th>class</th>\n",
940" </tr>\n",
941" </thead>\n",
942" <tbody>\n",
943" <tr>\n",
944" <th>134339</th>\n",
945" <td>a0134340</td>\n",
946" <td>2134340</td>\n",
947" <td>134340Pluto</td>\n",
948" <td>134340</td>\n",
949" <td>Pluto</td>\n",
950" <td>N</td>\n",
951" <td>N</td>\n",
952" <td>2361</td>\n",
953" <td>NaN</td>\n",
954" <td>29.573992</td>\n",
955" <td>90487.276927</td>\n",
956" <td>TNO</td>\n",
957" </tr>\n",
958" </tbody>\n",
959"</table>\n",
960"</div>"
961],
962"text/plain": [
963" id spkid full_name pdes name neo pha diameter prefix \\\n",
964"134339 a0134340 2134340 134340Pluto 134340 Pluto N N 2361 NaN \n",
965"\n",
966" q per class \n",
967"134339 29.573992 90487.276927 TNO "
968]
969},
970"execution_count": null,
971"metadata": {},
972"output_type": "execute_result"
973}
974],
975"source": [
976"# Check that Pluto has been added appropriately\n",
977"df[df['full_name'] == '134340Pluto']"
978]
979},
980{
981"cell_type": "code",
982"execution_count": null,
983"metadata": {},
984"outputs": [],
985"source": []
986}
987],
988"metadata": {
989"kernelspec": {
990"display_name": "Python 3",
991"language": "python",
992"name": "python3"
993},
994"language_info": {
995"codemirror_mode": {
996"name": "ipython",
997"version": 3
998},
999"file_extension": ".py",
1000"mimetype": "text/x-python",
1001"name": "python",
1002"nbconvert_exporter": "python",
1003"pygments_lexer": "ipython3",
1004"version": "3.7.3"
1005}
1006},
1007"nbformat": 4,
1008"nbformat_minor": 2
1009}
1010