atlas_of_worldstars
/
1_process_starbase_data.ipynb
501 строка · 15.3 Кб
1{
2"cells": [
3{
4"cell_type": "code",
5"execution_count": 1,
6"metadata": {},
7"outputs": [],
8"source": [
9"import pandas as pd"
10]
11},
12{
13"cell_type": "code",
14"execution_count": 2,
15"metadata": {},
16"outputs": [
17{
18"name": "stdout",
19"output_type": "stream",
20"text": [
21"watermark 1.8.1\n",
22"pandas 0.23.4\n",
23"ELEANOR LUTZ 2019-07-26 \n",
24"\n",
25"CPython 3.7.1\n",
26"IPython 7.2.0\n",
27"\n",
28"compiler : MSC v.1900 64 bit (AMD64)\n",
29"system : Windows\n",
30"release : 10\n",
31"machine : AMD64\n",
32"processor : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
33"CPU cores : 12\n",
34"interpreter: 64bit\n"
35]
36}
37],
38"source": [
39"# Watermark is not required for this code, but is included for information. \n",
40"import watermark\n",
41"%load_ext watermark\n",
42"%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
43]
44},
45{
46"cell_type": "code",
47"execution_count": 3,
48"metadata": {},
49"outputs": [
50{
51"data": {
52"text/html": [
53"<div>\n",
54"<style scoped>\n",
55" .dataframe tbody tr th:only-of-type {\n",
56" vertical-align: middle;\n",
57" }\n",
58"\n",
59" .dataframe tbody tr th {\n",
60" vertical-align: top;\n",
61" }\n",
62"\n",
63" .dataframe thead th {\n",
64" text-align: right;\n",
65" }\n",
66"</style>\n",
67"<table border=\"1\" class=\"dataframe\">\n",
68" <thead>\n",
69" <tr style=\"text-align: right;\">\n",
70" <th></th>\n",
71" <th>id</th>\n",
72" <th>hip</th>\n",
73" <th>hd</th>\n",
74" <th>hr</th>\n",
75" <th>gl</th>\n",
76" <th>bf</th>\n",
77" <th>proper</th>\n",
78" <th>ra</th>\n",
79" <th>dec</th>\n",
80" <th>dist</th>\n",
81" <th>...</th>\n",
82" <th>bayer</th>\n",
83" <th>flam</th>\n",
84" <th>con</th>\n",
85" <th>comp</th>\n",
86" <th>comp_primary</th>\n",
87" <th>base</th>\n",
88" <th>lum</th>\n",
89" <th>var</th>\n",
90" <th>var_min</th>\n",
91" <th>var_max</th>\n",
92" </tr>\n",
93" </thead>\n",
94" <tbody>\n",
95" <tr>\n",
96" <th>0</th>\n",
97" <td>0</td>\n",
98" <td>NaN</td>\n",
99" <td>NaN</td>\n",
100" <td>NaN</td>\n",
101" <td>NaN</td>\n",
102" <td>NaN</td>\n",
103" <td>Sol</td>\n",
104" <td>0.000000</td>\n",
105" <td>0.000000</td>\n",
106" <td>0.0000</td>\n",
107" <td>...</td>\n",
108" <td>NaN</td>\n",
109" <td>NaN</td>\n",
110" <td>NaN</td>\n",
111" <td>1</td>\n",
112" <td>0</td>\n",
113" <td>NaN</td>\n",
114" <td>1.000000</td>\n",
115" <td>NaN</td>\n",
116" <td>NaN</td>\n",
117" <td>NaN</td>\n",
118" </tr>\n",
119" <tr>\n",
120" <th>1</th>\n",
121" <td>1</td>\n",
122" <td>1.0</td>\n",
123" <td>224700.0</td>\n",
124" <td>NaN</td>\n",
125" <td>NaN</td>\n",
126" <td>NaN</td>\n",
127" <td>NaN</td>\n",
128" <td>0.000060</td>\n",
129" <td>1.089009</td>\n",
130" <td>219.7802</td>\n",
131" <td>...</td>\n",
132" <td>NaN</td>\n",
133" <td>NaN</td>\n",
134" <td>Psc</td>\n",
135" <td>1</td>\n",
136" <td>1</td>\n",
137" <td>NaN</td>\n",
138" <td>9.638290</td>\n",
139" <td>NaN</td>\n",
140" <td>NaN</td>\n",
141" <td>NaN</td>\n",
142" </tr>\n",
143" <tr>\n",
144" <th>2</th>\n",
145" <td>2</td>\n",
146" <td>2.0</td>\n",
147" <td>224690.0</td>\n",
148" <td>NaN</td>\n",
149" <td>NaN</td>\n",
150" <td>NaN</td>\n",
151" <td>NaN</td>\n",
152" <td>0.000283</td>\n",
153" <td>-19.498840</td>\n",
154" <td>47.9616</td>\n",
155" <td>...</td>\n",
156" <td>NaN</td>\n",
157" <td>NaN</td>\n",
158" <td>Cet</td>\n",
159" <td>1</td>\n",
160" <td>2</td>\n",
161" <td>NaN</td>\n",
162" <td>0.392283</td>\n",
163" <td>NaN</td>\n",
164" <td>NaN</td>\n",
165" <td>NaN</td>\n",
166" </tr>\n",
167" <tr>\n",
168" <th>3</th>\n",
169" <td>3</td>\n",
170" <td>3.0</td>\n",
171" <td>224699.0</td>\n",
172" <td>NaN</td>\n",
173" <td>NaN</td>\n",
174" <td>NaN</td>\n",
175" <td>NaN</td>\n",
176" <td>0.000335</td>\n",
177" <td>38.859279</td>\n",
178" <td>442.4779</td>\n",
179" <td>...</td>\n",
180" <td>NaN</td>\n",
181" <td>NaN</td>\n",
182" <td>And</td>\n",
183" <td>1</td>\n",
184" <td>3</td>\n",
185" <td>NaN</td>\n",
186" <td>386.901132</td>\n",
187" <td>NaN</td>\n",
188" <td>NaN</td>\n",
189" <td>NaN</td>\n",
190" </tr>\n",
191" <tr>\n",
192" <th>4</th>\n",
193" <td>4</td>\n",
194" <td>4.0</td>\n",
195" <td>224707.0</td>\n",
196" <td>NaN</td>\n",
197" <td>NaN</td>\n",
198" <td>NaN</td>\n",
199" <td>NaN</td>\n",
200" <td>0.000569</td>\n",
201" <td>-51.893546</td>\n",
202" <td>134.2282</td>\n",
203" <td>...</td>\n",
204" <td>NaN</td>\n",
205" <td>NaN</td>\n",
206" <td>Phe</td>\n",
207" <td>1</td>\n",
208" <td>4</td>\n",
209" <td>NaN</td>\n",
210" <td>9.366989</td>\n",
211" <td>NaN</td>\n",
212" <td>NaN</td>\n",
213" <td>NaN</td>\n",
214" </tr>\n",
215" </tbody>\n",
216"</table>\n",
217"<p>5 rows × 37 columns</p>\n",
218"</div>"
219],
220"text/plain": [
221" id hip hd hr gl bf proper ra dec dist \\\n",
222"0 0 NaN NaN NaN NaN NaN Sol 0.000000 0.000000 0.0000 \n",
223"1 1 1.0 224700.0 NaN NaN NaN NaN 0.000060 1.089009 219.7802 \n",
224"2 2 2.0 224690.0 NaN NaN NaN NaN 0.000283 -19.498840 47.9616 \n",
225"3 3 3.0 224699.0 NaN NaN NaN NaN 0.000335 38.859279 442.4779 \n",
226"4 4 4.0 224707.0 NaN NaN NaN NaN 0.000569 -51.893546 134.2282 \n",
227"\n",
228" ... bayer flam con comp comp_primary base lum var \\\n",
229"0 ... NaN NaN NaN 1 0 NaN 1.000000 NaN \n",
230"1 ... NaN NaN Psc 1 1 NaN 9.638290 NaN \n",
231"2 ... NaN NaN Cet 1 2 NaN 0.392283 NaN \n",
232"3 ... NaN NaN And 1 3 NaN 386.901132 NaN \n",
233"4 ... NaN NaN Phe 1 4 NaN 9.366989 NaN \n",
234"\n",
235" var_min var_max \n",
236"0 NaN NaN \n",
237"1 NaN NaN \n",
238"2 NaN NaN \n",
239"3 NaN NaN \n",
240"4 NaN NaN \n",
241"\n",
242"[5 rows x 37 columns]"
243]
244},
245"metadata": {},
246"output_type": "display_data"
247},
248{
249"data": {
250"text/html": [
251"<div>\n",
252"<style scoped>\n",
253" .dataframe tbody tr th:only-of-type {\n",
254" vertical-align: middle;\n",
255" }\n",
256"\n",
257" .dataframe tbody tr th {\n",
258" vertical-align: top;\n",
259" }\n",
260"\n",
261" .dataframe thead th {\n",
262" text-align: right;\n",
263" }\n",
264"</style>\n",
265"<table border=\"1\" class=\"dataframe\">\n",
266" <thead>\n",
267" <tr style=\"text-align: right;\">\n",
268" <th></th>\n",
269" <th>id</th>\n",
270" <th>hip</th>\n",
271" <th>hd</th>\n",
272" <th>hr</th>\n",
273" <th>gl</th>\n",
274" <th>bf</th>\n",
275" <th>proper</th>\n",
276" <th>ra</th>\n",
277" <th>dec</th>\n",
278" <th>dist</th>\n",
279" <th>...</th>\n",
280" <th>bayer</th>\n",
281" <th>flam</th>\n",
282" <th>con</th>\n",
283" <th>comp</th>\n",
284" <th>comp_primary</th>\n",
285" <th>base</th>\n",
286" <th>lum</th>\n",
287" <th>var</th>\n",
288" <th>var_min</th>\n",
289" <th>var_max</th>\n",
290" </tr>\n",
291" </thead>\n",
292" <tbody>\n",
293" <tr>\n",
294" <th>1</th>\n",
295" <td>1</td>\n",
296" <td>1.0</td>\n",
297" <td>224700.0</td>\n",
298" <td>NaN</td>\n",
299" <td>NaN</td>\n",
300" <td>NaN</td>\n",
301" <td>NaN</td>\n",
302" <td>0.000060</td>\n",
303" <td>1.089009</td>\n",
304" <td>219.7802</td>\n",
305" <td>...</td>\n",
306" <td>NaN</td>\n",
307" <td>NaN</td>\n",
308" <td>Psc</td>\n",
309" <td>1</td>\n",
310" <td>1</td>\n",
311" <td>NaN</td>\n",
312" <td>9.638290</td>\n",
313" <td>NaN</td>\n",
314" <td>NaN</td>\n",
315" <td>NaN</td>\n",
316" </tr>\n",
317" <tr>\n",
318" <th>2</th>\n",
319" <td>2</td>\n",
320" <td>2.0</td>\n",
321" <td>224690.0</td>\n",
322" <td>NaN</td>\n",
323" <td>NaN</td>\n",
324" <td>NaN</td>\n",
325" <td>NaN</td>\n",
326" <td>0.000283</td>\n",
327" <td>-19.498840</td>\n",
328" <td>47.9616</td>\n",
329" <td>...</td>\n",
330" <td>NaN</td>\n",
331" <td>NaN</td>\n",
332" <td>Cet</td>\n",
333" <td>1</td>\n",
334" <td>2</td>\n",
335" <td>NaN</td>\n",
336" <td>0.392283</td>\n",
337" <td>NaN</td>\n",
338" <td>NaN</td>\n",
339" <td>NaN</td>\n",
340" </tr>\n",
341" <tr>\n",
342" <th>3</th>\n",
343" <td>3</td>\n",
344" <td>3.0</td>\n",
345" <td>224699.0</td>\n",
346" <td>NaN</td>\n",
347" <td>NaN</td>\n",
348" <td>NaN</td>\n",
349" <td>NaN</td>\n",
350" <td>0.000335</td>\n",
351" <td>38.859279</td>\n",
352" <td>442.4779</td>\n",
353" <td>...</td>\n",
354" <td>NaN</td>\n",
355" <td>NaN</td>\n",
356" <td>And</td>\n",
357" <td>1</td>\n",
358" <td>3</td>\n",
359" <td>NaN</td>\n",
360" <td>386.901132</td>\n",
361" <td>NaN</td>\n",
362" <td>NaN</td>\n",
363" <td>NaN</td>\n",
364" </tr>\n",
365" <tr>\n",
366" <th>4</th>\n",
367" <td>4</td>\n",
368" <td>4.0</td>\n",
369" <td>224707.0</td>\n",
370" <td>NaN</td>\n",
371" <td>NaN</td>\n",
372" <td>NaN</td>\n",
373" <td>NaN</td>\n",
374" <td>0.000569</td>\n",
375" <td>-51.893546</td>\n",
376" <td>134.2282</td>\n",
377" <td>...</td>\n",
378" <td>NaN</td>\n",
379" <td>NaN</td>\n",
380" <td>Phe</td>\n",
381" <td>1</td>\n",
382" <td>4</td>\n",
383" <td>NaN</td>\n",
384" <td>9.366989</td>\n",
385" <td>NaN</td>\n",
386" <td>NaN</td>\n",
387" <td>NaN</td>\n",
388" </tr>\n",
389" <tr>\n",
390" <th>5</th>\n",
391" <td>5</td>\n",
392" <td>5.0</td>\n",
393" <td>224705.0</td>\n",
394" <td>NaN</td>\n",
395" <td>NaN</td>\n",
396" <td>NaN</td>\n",
397" <td>NaN</td>\n",
398" <td>0.000665</td>\n",
399" <td>-40.591202</td>\n",
400" <td>257.7320</td>\n",
401" <td>...</td>\n",
402" <td>NaN</td>\n",
403" <td>NaN</td>\n",
404" <td>Phe</td>\n",
405" <td>1</td>\n",
406" <td>5</td>\n",
407" <td>NaN</td>\n",
408" <td>21.998851</td>\n",
409" <td>NaN</td>\n",
410" <td>NaN</td>\n",
411" <td>NaN</td>\n",
412" </tr>\n",
413" </tbody>\n",
414"</table>\n",
415"<p>5 rows × 37 columns</p>\n",
416"</div>"
417],
418"text/plain": [
419" id hip hd hr gl bf proper ra dec dist \\\n",
420"1 1 1.0 224700.0 NaN NaN NaN NaN 0.000060 1.089009 219.7802 \n",
421"2 2 2.0 224690.0 NaN NaN NaN NaN 0.000283 -19.498840 47.9616 \n",
422"3 3 3.0 224699.0 NaN NaN NaN NaN 0.000335 38.859279 442.4779 \n",
423"4 4 4.0 224707.0 NaN NaN NaN NaN 0.000569 -51.893546 134.2282 \n",
424"5 5 5.0 224705.0 NaN NaN NaN NaN 0.000665 -40.591202 257.7320 \n",
425"\n",
426" ... bayer flam con comp comp_primary base lum var \\\n",
427"1 ... NaN NaN Psc 1 1 NaN 9.638290 NaN \n",
428"2 ... NaN NaN Cet 1 2 NaN 0.392283 NaN \n",
429"3 ... NaN NaN And 1 3 NaN 386.901132 NaN \n",
430"4 ... NaN NaN Phe 1 4 NaN 9.366989 NaN \n",
431"5 ... NaN NaN Phe 1 5 NaN 21.998851 NaN \n",
432"\n",
433" var_min var_max \n",
434"1 NaN NaN \n",
435"2 NaN NaN \n",
436"3 NaN NaN \n",
437"4 NaN NaN \n",
438"5 NaN NaN \n",
439"\n",
440"[5 rows x 37 columns]"
441]
442},
443"metadata": {},
444"output_type": "display_data"
445},
446{
447"name": "stdout",
448"output_type": "stream",
449"text": [
450"119613 total stars available in database\n",
451"8912 stars visible to the human eye\n"
452]
453}
454],
455"source": [
456"# Clip the HYG Database to only include stars that are visible from Earth (magnitude <= 6.5)\n",
457"\n",
458"df = pd.read_csv('./data/hygdata_v3/hygdata_v3.csv', low_memory=False)\n",
459"display(df.head())\n",
460"\n",
461"# Remove the sun because it doesn't make sense in a star chart\n",
462"df = df[df['proper'] != 'Sol']\n",
463"display(df.head())\n",
464"df.to_csv('./data/processed/hygdata_processed.csv', index=False)\n",
465"\n",
466"print(len(df), 'total stars available in database')\n",
467"df = df[df['mag'] <= 6.5]\n",
468"print(len(df), 'stars visible to the human eye')\n",
469"df.to_csv('./data/processed/hygdata_processed_mag65.csv', index=False)"
470]
471},
472{
473"cell_type": "code",
474"execution_count": null,
475"metadata": {},
476"outputs": [],
477"source": []
478}
479],
480"metadata": {
481"kernelspec": {
482"display_name": "Python 3",
483"language": "python",
484"name": "python3"
485},
486"language_info": {
487"codemirror_mode": {
488"name": "ipython",
489"version": 3
490},
491"file_extension": ".py",
492"mimetype": "text/x-python",
493"name": "python",
494"nbconvert_exporter": "python",
495"pygments_lexer": "ipython3",
496"version": "3.7.1"
497}
498},
499"nbformat": 4,
500"nbformat_minor": 2
501}
502