atlas_of_worldstars

Форк
0
/
1_process_starbase_data.ipynb 
501 строка · 15.3 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import pandas as pd"
10
   ]
11
  },
12
  {
13
   "cell_type": "code",
14
   "execution_count": 2,
15
   "metadata": {},
16
   "outputs": [
17
    {
18
     "name": "stdout",
19
     "output_type": "stream",
20
     "text": [
21
      "watermark 1.8.1\n",
22
      "pandas    0.23.4\n",
23
      "ELEANOR LUTZ 2019-07-26 \n",
24
      "\n",
25
      "CPython 3.7.1\n",
26
      "IPython 7.2.0\n",
27
      "\n",
28
      "compiler   : MSC v.1900 64 bit (AMD64)\n",
29
      "system     : Windows\n",
30
      "release    : 10\n",
31
      "machine    : AMD64\n",
32
      "processor  : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
33
      "CPU cores  : 12\n",
34
      "interpreter: 64bit\n"
35
     ]
36
    }
37
   ],
38
   "source": [
39
    "# Watermark is not required for this code, but is included for information. \n",
40
    "import watermark\n",
41
    "%load_ext watermark\n",
42
    "%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
43
   ]
44
  },
45
  {
46
   "cell_type": "code",
47
   "execution_count": 3,
48
   "metadata": {},
49
   "outputs": [
50
    {
51
     "data": {
52
      "text/html": [
53
       "<div>\n",
54
       "<style scoped>\n",
55
       "    .dataframe tbody tr th:only-of-type {\n",
56
       "        vertical-align: middle;\n",
57
       "    }\n",
58
       "\n",
59
       "    .dataframe tbody tr th {\n",
60
       "        vertical-align: top;\n",
61
       "    }\n",
62
       "\n",
63
       "    .dataframe thead th {\n",
64
       "        text-align: right;\n",
65
       "    }\n",
66
       "</style>\n",
67
       "<table border=\"1\" class=\"dataframe\">\n",
68
       "  <thead>\n",
69
       "    <tr style=\"text-align: right;\">\n",
70
       "      <th></th>\n",
71
       "      <th>id</th>\n",
72
       "      <th>hip</th>\n",
73
       "      <th>hd</th>\n",
74
       "      <th>hr</th>\n",
75
       "      <th>gl</th>\n",
76
       "      <th>bf</th>\n",
77
       "      <th>proper</th>\n",
78
       "      <th>ra</th>\n",
79
       "      <th>dec</th>\n",
80
       "      <th>dist</th>\n",
81
       "      <th>...</th>\n",
82
       "      <th>bayer</th>\n",
83
       "      <th>flam</th>\n",
84
       "      <th>con</th>\n",
85
       "      <th>comp</th>\n",
86
       "      <th>comp_primary</th>\n",
87
       "      <th>base</th>\n",
88
       "      <th>lum</th>\n",
89
       "      <th>var</th>\n",
90
       "      <th>var_min</th>\n",
91
       "      <th>var_max</th>\n",
92
       "    </tr>\n",
93
       "  </thead>\n",
94
       "  <tbody>\n",
95
       "    <tr>\n",
96
       "      <th>0</th>\n",
97
       "      <td>0</td>\n",
98
       "      <td>NaN</td>\n",
99
       "      <td>NaN</td>\n",
100
       "      <td>NaN</td>\n",
101
       "      <td>NaN</td>\n",
102
       "      <td>NaN</td>\n",
103
       "      <td>Sol</td>\n",
104
       "      <td>0.000000</td>\n",
105
       "      <td>0.000000</td>\n",
106
       "      <td>0.0000</td>\n",
107
       "      <td>...</td>\n",
108
       "      <td>NaN</td>\n",
109
       "      <td>NaN</td>\n",
110
       "      <td>NaN</td>\n",
111
       "      <td>1</td>\n",
112
       "      <td>0</td>\n",
113
       "      <td>NaN</td>\n",
114
       "      <td>1.000000</td>\n",
115
       "      <td>NaN</td>\n",
116
       "      <td>NaN</td>\n",
117
       "      <td>NaN</td>\n",
118
       "    </tr>\n",
119
       "    <tr>\n",
120
       "      <th>1</th>\n",
121
       "      <td>1</td>\n",
122
       "      <td>1.0</td>\n",
123
       "      <td>224700.0</td>\n",
124
       "      <td>NaN</td>\n",
125
       "      <td>NaN</td>\n",
126
       "      <td>NaN</td>\n",
127
       "      <td>NaN</td>\n",
128
       "      <td>0.000060</td>\n",
129
       "      <td>1.089009</td>\n",
130
       "      <td>219.7802</td>\n",
131
       "      <td>...</td>\n",
132
       "      <td>NaN</td>\n",
133
       "      <td>NaN</td>\n",
134
       "      <td>Psc</td>\n",
135
       "      <td>1</td>\n",
136
       "      <td>1</td>\n",
137
       "      <td>NaN</td>\n",
138
       "      <td>9.638290</td>\n",
139
       "      <td>NaN</td>\n",
140
       "      <td>NaN</td>\n",
141
       "      <td>NaN</td>\n",
142
       "    </tr>\n",
143
       "    <tr>\n",
144
       "      <th>2</th>\n",
145
       "      <td>2</td>\n",
146
       "      <td>2.0</td>\n",
147
       "      <td>224690.0</td>\n",
148
       "      <td>NaN</td>\n",
149
       "      <td>NaN</td>\n",
150
       "      <td>NaN</td>\n",
151
       "      <td>NaN</td>\n",
152
       "      <td>0.000283</td>\n",
153
       "      <td>-19.498840</td>\n",
154
       "      <td>47.9616</td>\n",
155
       "      <td>...</td>\n",
156
       "      <td>NaN</td>\n",
157
       "      <td>NaN</td>\n",
158
       "      <td>Cet</td>\n",
159
       "      <td>1</td>\n",
160
       "      <td>2</td>\n",
161
       "      <td>NaN</td>\n",
162
       "      <td>0.392283</td>\n",
163
       "      <td>NaN</td>\n",
164
       "      <td>NaN</td>\n",
165
       "      <td>NaN</td>\n",
166
       "    </tr>\n",
167
       "    <tr>\n",
168
       "      <th>3</th>\n",
169
       "      <td>3</td>\n",
170
       "      <td>3.0</td>\n",
171
       "      <td>224699.0</td>\n",
172
       "      <td>NaN</td>\n",
173
       "      <td>NaN</td>\n",
174
       "      <td>NaN</td>\n",
175
       "      <td>NaN</td>\n",
176
       "      <td>0.000335</td>\n",
177
       "      <td>38.859279</td>\n",
178
       "      <td>442.4779</td>\n",
179
       "      <td>...</td>\n",
180
       "      <td>NaN</td>\n",
181
       "      <td>NaN</td>\n",
182
       "      <td>And</td>\n",
183
       "      <td>1</td>\n",
184
       "      <td>3</td>\n",
185
       "      <td>NaN</td>\n",
186
       "      <td>386.901132</td>\n",
187
       "      <td>NaN</td>\n",
188
       "      <td>NaN</td>\n",
189
       "      <td>NaN</td>\n",
190
       "    </tr>\n",
191
       "    <tr>\n",
192
       "      <th>4</th>\n",
193
       "      <td>4</td>\n",
194
       "      <td>4.0</td>\n",
195
       "      <td>224707.0</td>\n",
196
       "      <td>NaN</td>\n",
197
       "      <td>NaN</td>\n",
198
       "      <td>NaN</td>\n",
199
       "      <td>NaN</td>\n",
200
       "      <td>0.000569</td>\n",
201
       "      <td>-51.893546</td>\n",
202
       "      <td>134.2282</td>\n",
203
       "      <td>...</td>\n",
204
       "      <td>NaN</td>\n",
205
       "      <td>NaN</td>\n",
206
       "      <td>Phe</td>\n",
207
       "      <td>1</td>\n",
208
       "      <td>4</td>\n",
209
       "      <td>NaN</td>\n",
210
       "      <td>9.366989</td>\n",
211
       "      <td>NaN</td>\n",
212
       "      <td>NaN</td>\n",
213
       "      <td>NaN</td>\n",
214
       "    </tr>\n",
215
       "  </tbody>\n",
216
       "</table>\n",
217
       "<p>5 rows × 37 columns</p>\n",
218
       "</div>"
219
      ],
220
      "text/plain": [
221
       "   id  hip        hd  hr   gl   bf proper        ra        dec      dist  \\\n",
222
       "0   0  NaN       NaN NaN  NaN  NaN    Sol  0.000000   0.000000    0.0000   \n",
223
       "1   1  1.0  224700.0 NaN  NaN  NaN    NaN  0.000060   1.089009  219.7802   \n",
224
       "2   2  2.0  224690.0 NaN  NaN  NaN    NaN  0.000283 -19.498840   47.9616   \n",
225
       "3   3  3.0  224699.0 NaN  NaN  NaN    NaN  0.000335  38.859279  442.4779   \n",
226
       "4   4  4.0  224707.0 NaN  NaN  NaN    NaN  0.000569 -51.893546  134.2282   \n",
227
       "\n",
228
       "    ...     bayer  flam  con  comp  comp_primary base         lum  var  \\\n",
229
       "0   ...       NaN   NaN  NaN     1             0  NaN    1.000000  NaN   \n",
230
       "1   ...       NaN   NaN  Psc     1             1  NaN    9.638290  NaN   \n",
231
       "2   ...       NaN   NaN  Cet     1             2  NaN    0.392283  NaN   \n",
232
       "3   ...       NaN   NaN  And     1             3  NaN  386.901132  NaN   \n",
233
       "4   ...       NaN   NaN  Phe     1             4  NaN    9.366989  NaN   \n",
234
       "\n",
235
       "   var_min  var_max  \n",
236
       "0      NaN      NaN  \n",
237
       "1      NaN      NaN  \n",
238
       "2      NaN      NaN  \n",
239
       "3      NaN      NaN  \n",
240
       "4      NaN      NaN  \n",
241
       "\n",
242
       "[5 rows x 37 columns]"
243
      ]
244
     },
245
     "metadata": {},
246
     "output_type": "display_data"
247
    },
248
    {
249
     "data": {
250
      "text/html": [
251
       "<div>\n",
252
       "<style scoped>\n",
253
       "    .dataframe tbody tr th:only-of-type {\n",
254
       "        vertical-align: middle;\n",
255
       "    }\n",
256
       "\n",
257
       "    .dataframe tbody tr th {\n",
258
       "        vertical-align: top;\n",
259
       "    }\n",
260
       "\n",
261
       "    .dataframe thead th {\n",
262
       "        text-align: right;\n",
263
       "    }\n",
264
       "</style>\n",
265
       "<table border=\"1\" class=\"dataframe\">\n",
266
       "  <thead>\n",
267
       "    <tr style=\"text-align: right;\">\n",
268
       "      <th></th>\n",
269
       "      <th>id</th>\n",
270
       "      <th>hip</th>\n",
271
       "      <th>hd</th>\n",
272
       "      <th>hr</th>\n",
273
       "      <th>gl</th>\n",
274
       "      <th>bf</th>\n",
275
       "      <th>proper</th>\n",
276
       "      <th>ra</th>\n",
277
       "      <th>dec</th>\n",
278
       "      <th>dist</th>\n",
279
       "      <th>...</th>\n",
280
       "      <th>bayer</th>\n",
281
       "      <th>flam</th>\n",
282
       "      <th>con</th>\n",
283
       "      <th>comp</th>\n",
284
       "      <th>comp_primary</th>\n",
285
       "      <th>base</th>\n",
286
       "      <th>lum</th>\n",
287
       "      <th>var</th>\n",
288
       "      <th>var_min</th>\n",
289
       "      <th>var_max</th>\n",
290
       "    </tr>\n",
291
       "  </thead>\n",
292
       "  <tbody>\n",
293
       "    <tr>\n",
294
       "      <th>1</th>\n",
295
       "      <td>1</td>\n",
296
       "      <td>1.0</td>\n",
297
       "      <td>224700.0</td>\n",
298
       "      <td>NaN</td>\n",
299
       "      <td>NaN</td>\n",
300
       "      <td>NaN</td>\n",
301
       "      <td>NaN</td>\n",
302
       "      <td>0.000060</td>\n",
303
       "      <td>1.089009</td>\n",
304
       "      <td>219.7802</td>\n",
305
       "      <td>...</td>\n",
306
       "      <td>NaN</td>\n",
307
       "      <td>NaN</td>\n",
308
       "      <td>Psc</td>\n",
309
       "      <td>1</td>\n",
310
       "      <td>1</td>\n",
311
       "      <td>NaN</td>\n",
312
       "      <td>9.638290</td>\n",
313
       "      <td>NaN</td>\n",
314
       "      <td>NaN</td>\n",
315
       "      <td>NaN</td>\n",
316
       "    </tr>\n",
317
       "    <tr>\n",
318
       "      <th>2</th>\n",
319
       "      <td>2</td>\n",
320
       "      <td>2.0</td>\n",
321
       "      <td>224690.0</td>\n",
322
       "      <td>NaN</td>\n",
323
       "      <td>NaN</td>\n",
324
       "      <td>NaN</td>\n",
325
       "      <td>NaN</td>\n",
326
       "      <td>0.000283</td>\n",
327
       "      <td>-19.498840</td>\n",
328
       "      <td>47.9616</td>\n",
329
       "      <td>...</td>\n",
330
       "      <td>NaN</td>\n",
331
       "      <td>NaN</td>\n",
332
       "      <td>Cet</td>\n",
333
       "      <td>1</td>\n",
334
       "      <td>2</td>\n",
335
       "      <td>NaN</td>\n",
336
       "      <td>0.392283</td>\n",
337
       "      <td>NaN</td>\n",
338
       "      <td>NaN</td>\n",
339
       "      <td>NaN</td>\n",
340
       "    </tr>\n",
341
       "    <tr>\n",
342
       "      <th>3</th>\n",
343
       "      <td>3</td>\n",
344
       "      <td>3.0</td>\n",
345
       "      <td>224699.0</td>\n",
346
       "      <td>NaN</td>\n",
347
       "      <td>NaN</td>\n",
348
       "      <td>NaN</td>\n",
349
       "      <td>NaN</td>\n",
350
       "      <td>0.000335</td>\n",
351
       "      <td>38.859279</td>\n",
352
       "      <td>442.4779</td>\n",
353
       "      <td>...</td>\n",
354
       "      <td>NaN</td>\n",
355
       "      <td>NaN</td>\n",
356
       "      <td>And</td>\n",
357
       "      <td>1</td>\n",
358
       "      <td>3</td>\n",
359
       "      <td>NaN</td>\n",
360
       "      <td>386.901132</td>\n",
361
       "      <td>NaN</td>\n",
362
       "      <td>NaN</td>\n",
363
       "      <td>NaN</td>\n",
364
       "    </tr>\n",
365
       "    <tr>\n",
366
       "      <th>4</th>\n",
367
       "      <td>4</td>\n",
368
       "      <td>4.0</td>\n",
369
       "      <td>224707.0</td>\n",
370
       "      <td>NaN</td>\n",
371
       "      <td>NaN</td>\n",
372
       "      <td>NaN</td>\n",
373
       "      <td>NaN</td>\n",
374
       "      <td>0.000569</td>\n",
375
       "      <td>-51.893546</td>\n",
376
       "      <td>134.2282</td>\n",
377
       "      <td>...</td>\n",
378
       "      <td>NaN</td>\n",
379
       "      <td>NaN</td>\n",
380
       "      <td>Phe</td>\n",
381
       "      <td>1</td>\n",
382
       "      <td>4</td>\n",
383
       "      <td>NaN</td>\n",
384
       "      <td>9.366989</td>\n",
385
       "      <td>NaN</td>\n",
386
       "      <td>NaN</td>\n",
387
       "      <td>NaN</td>\n",
388
       "    </tr>\n",
389
       "    <tr>\n",
390
       "      <th>5</th>\n",
391
       "      <td>5</td>\n",
392
       "      <td>5.0</td>\n",
393
       "      <td>224705.0</td>\n",
394
       "      <td>NaN</td>\n",
395
       "      <td>NaN</td>\n",
396
       "      <td>NaN</td>\n",
397
       "      <td>NaN</td>\n",
398
       "      <td>0.000665</td>\n",
399
       "      <td>-40.591202</td>\n",
400
       "      <td>257.7320</td>\n",
401
       "      <td>...</td>\n",
402
       "      <td>NaN</td>\n",
403
       "      <td>NaN</td>\n",
404
       "      <td>Phe</td>\n",
405
       "      <td>1</td>\n",
406
       "      <td>5</td>\n",
407
       "      <td>NaN</td>\n",
408
       "      <td>21.998851</td>\n",
409
       "      <td>NaN</td>\n",
410
       "      <td>NaN</td>\n",
411
       "      <td>NaN</td>\n",
412
       "    </tr>\n",
413
       "  </tbody>\n",
414
       "</table>\n",
415
       "<p>5 rows × 37 columns</p>\n",
416
       "</div>"
417
      ],
418
      "text/plain": [
419
       "   id  hip        hd  hr   gl   bf proper        ra        dec      dist  \\\n",
420
       "1   1  1.0  224700.0 NaN  NaN  NaN    NaN  0.000060   1.089009  219.7802   \n",
421
       "2   2  2.0  224690.0 NaN  NaN  NaN    NaN  0.000283 -19.498840   47.9616   \n",
422
       "3   3  3.0  224699.0 NaN  NaN  NaN    NaN  0.000335  38.859279  442.4779   \n",
423
       "4   4  4.0  224707.0 NaN  NaN  NaN    NaN  0.000569 -51.893546  134.2282   \n",
424
       "5   5  5.0  224705.0 NaN  NaN  NaN    NaN  0.000665 -40.591202  257.7320   \n",
425
       "\n",
426
       "    ...     bayer  flam  con  comp  comp_primary base         lum  var  \\\n",
427
       "1   ...       NaN   NaN  Psc     1             1  NaN    9.638290  NaN   \n",
428
       "2   ...       NaN   NaN  Cet     1             2  NaN    0.392283  NaN   \n",
429
       "3   ...       NaN   NaN  And     1             3  NaN  386.901132  NaN   \n",
430
       "4   ...       NaN   NaN  Phe     1             4  NaN    9.366989  NaN   \n",
431
       "5   ...       NaN   NaN  Phe     1             5  NaN   21.998851  NaN   \n",
432
       "\n",
433
       "   var_min  var_max  \n",
434
       "1      NaN      NaN  \n",
435
       "2      NaN      NaN  \n",
436
       "3      NaN      NaN  \n",
437
       "4      NaN      NaN  \n",
438
       "5      NaN      NaN  \n",
439
       "\n",
440
       "[5 rows x 37 columns]"
441
      ]
442
     },
443
     "metadata": {},
444
     "output_type": "display_data"
445
    },
446
    {
447
     "name": "stdout",
448
     "output_type": "stream",
449
     "text": [
450
      "119613 total stars available in database\n",
451
      "8912 stars visible to the human eye\n"
452
     ]
453
    }
454
   ],
455
   "source": [
456
    "# Clip the HYG Database to only include stars that are visible from Earth (magnitude <= 6.5)\n",
457
    "\n",
458
    "df = pd.read_csv('./data/hygdata_v3/hygdata_v3.csv', low_memory=False)\n",
459
    "display(df.head())\n",
460
    "\n",
461
    "# Remove the sun because it doesn't make sense in a star chart\n",
462
    "df = df[df['proper'] != 'Sol']\n",
463
    "display(df.head())\n",
464
    "df.to_csv('./data/processed/hygdata_processed.csv', index=False)\n",
465
    "\n",
466
    "print(len(df), 'total stars available in database')\n",
467
    "df = df[df['mag'] <= 6.5]\n",
468
    "print(len(df), 'stars visible to the human eye')\n",
469
    "df.to_csv('./data/processed/hygdata_processed_mag65.csv', index=False)"
470
   ]
471
  },
472
  {
473
   "cell_type": "code",
474
   "execution_count": null,
475
   "metadata": {},
476
   "outputs": [],
477
   "source": []
478
  }
479
 ],
480
 "metadata": {
481
  "kernelspec": {
482
   "display_name": "Python 3",
483
   "language": "python",
484
   "name": "python3"
485
  },
486
  "language_info": {
487
   "codemirror_mode": {
488
    "name": "ipython",
489
    "version": 3
490
   },
491
   "file_extension": ".py",
492
   "mimetype": "text/x-python",
493
   "name": "python",
494
   "nbconvert_exporter": "python",
495
   "pygments_lexer": "ipython3",
496
   "version": "3.7.1"
497
  }
498
 },
499
 "nbformat": 4,
500
 "nbformat_minor": 2
501
}
502

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.