atlas_of_worldstars

3_process_asterism_names.ipynb
417 строк · 13.6 Кб
Перенос по словам
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import pandas as pd\n",
10
    "import numpy as np\n",
11
    "import glob"
12
   ]
13
  },
14
  {
15
   "cell_type": "code",
16
   "execution_count": 2,
17
   "metadata": {},
18
   "outputs": [
19
    {
20
     "name": "stdout",
21
     "output_type": "stream",
22
     "text": [
23
      "watermark 1.8.1\n",
24
      "numpy     1.15.4\n",
25
      "pandas    0.23.4\n",
26
      "ELEANOR LUTZ 2019-07-26 \n",
27
      "\n",
28
      "CPython 3.7.1\n",
29
      "IPython 7.2.0\n",
30
      "\n",
31
      "compiler   : MSC v.1900 64 bit (AMD64)\n",
32
      "system     : Windows\n",
33
      "release    : 10\n",
34
      "machine    : AMD64\n",
35
      "processor  : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
36
      "CPU cores  : 12\n",
37
      "interpreter: 64bit\n"
38
     ]
39
    }
40
   ],
41
   "source": [
42
    "# Watermark is not required for this code, but is included for information. \n",
43
    "import watermark\n",
44
    "%load_ext watermark\n",
45
    "%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
46
   ]
47
  },
48
  {
49
   "cell_type": "code",
50
   "execution_count": 3,
51
   "metadata": {},
52
   "outputs": [
53
    {
54
     "name": "stdout",
55
     "output_type": "stream",
56
     "text": [
57
      "['arabic', 'arabic_moon_stations', 'armintxe', 'aztec', 'belarusian', 'boorong', 'chinese_medieval', 'dakota', 'egyptian', 'hawaiian_starlines', 'indian', 'inuit', 'japanese_moon_stations', 'kamilaroi', 'korean', 'lokono', 'macedonian', 'maori', 'maya', 'mongolian', 'mulapin', 'navajo', 'norse', 'northern_andes', 'ojibwe', 'romanian', 'sami', 'sardinian', 'seleucid', 'siberian', 'tongan', 'tukano', 'tupi', 'western']\n"
58
     ]
59
    }
60
   ],
61
   "source": [
62
    "# Read in processed star data (created in 1_process_starbase_data.ipynb)\n",
63
    "hip_df = pd.read_csv(\"./data/processed/hygdata_processed.csv\", low_memory=False)\n",
64
    "\n",
65
    "# Exclude \"culture\" folders that are replicated in other folders. \n",
66
    "# \"chinese_medieval\" and \"western\" are used instead of the alternates in the list below (\"exclude\") \n",
67
    "exclude = ['western_SnT', 'western_hlad', 'western_rey', 'chinese_contemporary', 'chinese']\n",
68
    "fabfiles = glob.glob('./data/skycultures/*/')\n",
69
    "fabfiles = [x.split(\"\\\\\")[-2] for x in fabfiles]\n",
70
    "fabfiles = [x for x in fabfiles if x not in exclude]\n",
71
    "\n",
72
    "# Print all cultures that will be analyzed in this Jupyter notebook\n",
73
    "print(fabfiles)"
74
   ]
75
  },
76
  {
77
   "cell_type": "code",
78
   "execution_count": 4,
79
   "metadata": {
80
    "scrolled": false
81
   },
82
   "outputs": [
83
    {
84
     "data": {
85
      "text/html": [
86
       "<div>\n",
87
       "<style scoped>\n",
88
       "    .dataframe tbody tr th:only-of-type {\n",
89
       "        vertical-align: middle;\n",
90
       "    }\n",
91
       "\n",
92
       "    .dataframe tbody tr th {\n",
93
       "        vertical-align: top;\n",
94
       "    }\n",
95
       "\n",
96
       "    .dataframe thead th {\n",
97
       "        text-align: right;\n",
98
       "    }\n",
99
       "</style>\n",
100
       "<table border=\"1\" class=\"dataframe\">\n",
101
       "  <thead>\n",
102
       "    <tr style=\"text-align: right;\">\n",
103
       "      <th></th>\n",
104
       "      <th>num_pairs</th>\n",
105
       "      <th>star</th>\n",
106
       "      <th>name</th>\n",
107
       "      <th>ra</th>\n",
108
       "      <th>dec</th>\n",
109
       "      <th>culture</th>\n",
110
       "    </tr>\n",
111
       "  </thead>\n",
112
       "  <tbody>\n",
113
       "    <tr>\n",
114
       "      <th>40</th>\n",
115
       "      <td>9</td>\n",
116
       "      <td>114131</td>\n",
117
       "      <td>Grus</td>\n",
118
       "      <td>23.1147</td>\n",
119
       "      <td>-43.5204</td>\n",
120
       "      <td>western</td>\n",
121
       "    </tr>\n",
122
       "    <tr>\n",
123
       "      <th>9</th>\n",
124
       "      <td>9</td>\n",
125
       "      <td>71795</td>\n",
126
       "      <td>Bootes</td>\n",
127
       "      <td>14.6858</td>\n",
128
       "      <td>13.7283</td>\n",
129
       "      <td>western</td>\n",
130
       "    </tr>\n",
131
       "    <tr>\n",
132
       "      <th>28</th>\n",
133
       "      <td>9</td>\n",
134
       "      <td>94779</td>\n",
135
       "      <td>Cygnus</td>\n",
136
       "      <td>19.285</td>\n",
137
       "      <td>53.3685</td>\n",
138
       "      <td>western</td>\n",
139
       "    </tr>\n",
140
       "    <tr>\n",
141
       "      <th>24</th>\n",
142
       "      <td>9</td>\n",
143
       "      <td>91875</td>\n",
144
       "      <td>Corona Australis</td>\n",
145
       "      <td>18.7297</td>\n",
146
       "      <td>-38.3234</td>\n",
147
       "      <td>western</td>\n",
148
       "    </tr>\n",
149
       "    <tr>\n",
150
       "      <th>23</th>\n",
151
       "      <td>9</td>\n",
152
       "      <td>53740</td>\n",
153
       "      <td>Crater</td>\n",
154
       "      <td>10.9962</td>\n",
155
       "      <td>-18.2988</td>\n",
156
       "      <td>western</td>\n",
157
       "    </tr>\n",
158
       "  </tbody>\n",
159
       "</table>\n",
160
       "</div>"
161
      ],
162
      "text/plain": [
163
       "   num_pairs    star              name       ra      dec  culture\n",
164
       "40         9  114131              Grus  23.1147 -43.5204  western\n",
165
       "9          9   71795            Bootes  14.6858  13.7283  western\n",
166
       "28         9   94779            Cygnus   19.285  53.3685  western\n",
167
       "24         9   91875  Corona Australis  18.7297 -38.3234  western\n",
168
       "23         9   53740            Crater  10.9962 -18.2988  western"
169
      ]
170
     },
171
     "metadata": {},
172
     "output_type": "display_data"
173
    }
174
   ],
175
   "source": [
176
    "'''\n",
177
    "Make a table (df) of all named asterisms, as well as the RA/DEC location information\n",
178
    "(this code just uses one of the stars as the plotting location).\n",
179
    "The num_pairs column describes how many lines (star pairs) are included in the asterism. \n",
180
    "The result is saved as a separate file for each culture. \n",
181
    "'''\n",
182
    "\n",
183
    "for name in fabfiles: \n",
184
    "    fname = './data/skycultures/'+name+'/constellationship.fab'\n",
185
    "\n",
186
    "    df = pd.read_csv(fname, header=None, encoding='utf-8')\n",
187
    "    df['constellation'] = df[0].str.split().str.get(0).astype(str)\n",
188
    "    df['num_pairs'] = df[0].str.split().str.get(1)\n",
189
    "    df['star'] = df[0].str.split().str[2]\n",
190
    "    df.drop(0, axis=1, inplace=True)\n",
191
    "\n",
192
    "    # Merge by names of constellations\n",
193
    "    lines = []\n",
194
    "    if name == 'chinese_medieval': \n",
195
    "        # names available in original language characters\n",
196
    "        readname = './data/skycultures/'+name+'/constellation_names.zh_CN.fab'\n",
197
    "    else: \n",
198
    "        readname = './data/skycultures/'+name+'/constellation_names.eng.fab'\n",
199
    "    with open(readname, encoding='utf-8') as f: \n",
200
    "        for line in f: \n",
201
    "            if line[0] != '#':\n",
202
    "                lines.append(str(line))\n",
203
    "    newlines = []\n",
204
    "    for line in lines:\n",
205
    "        line = line.replace(\"\\n\",\"\").replace(\"_\",\"\").split('\"')\n",
206
    "        line[0] = line[0].strip()\n",
207
    "        if len(line[0]) != 0:\n",
208
    "            if len(line[1]) == 0:\n",
209
    "                line[1] = line[3]\n",
210
    "            newlines.append(line)\n",
211
    "        \n",
212
    "    cs = [x[0] for x in newlines]\n",
213
    "    ns = [x[1] for x in newlines]\n",
214
    "    df_names = pd.DataFrame.from_dict({\"constellation\":cs, \"name\":ns})\n",
215
    "    df = pd.merge(df, df_names, on=\"constellation\", how='left')\n",
216
    "\n",
217
    "    # Merge star locations back into original database\n",
218
    "    df['ra'] = ''\n",
219
    "    df['dec'] = ''\n",
220
    "    for index, row in df.iterrows(): \n",
221
    "        star = row['star']\n",
222
    "        # Manually fix problematic star IDs\n",
223
    "        # SOURCE: Hipparcos catalog. Mapped to HD ID identifier. \n",
224
    "        # http://tdc-www.harvard.edu/catalogs/hipparcos.html\n",
225
    "        # https://www.cosmos.esa.int/web/hipparcos/search-facility\n",
226
    "        if star == '78727':\n",
227
    "            temp = hip_df[hip_df['hd']== 144069.]\n",
228
    "        else:\n",
229
    "            temp = hip_df[hip_df['hip']==float(star)]\n",
230
    "        if len(temp) != 1:\n",
231
    "            print(star, 'has issues in merging with HIP catalogue')\n",
232
    "            display(temp.head())\n",
233
    "        df.at[index, 'ra'] = temp['ra'].tolist()[0]\n",
234
    "        df.at[index, 'dec'] = temp['dec'].tolist()[0]\n",
235
    "\n",
236
    "    df['culture'] = name\n",
237
    "    df.drop('constellation', axis=1, inplace=True)\n",
238
    "    df.sort_values(by='num_pairs', ascending=False, inplace=True)\n",
239
    "    df.to_csv('./data/processed/skycultures/asterism_names/'+name+'_names.csv', index=False)\n",
240
    "    \n",
241
    "display(df.head())"
242
   ]
243
  },
244
  {
245
   "cell_type": "code",
246
   "execution_count": 5,
247
   "metadata": {},
248
   "outputs": [
249
    {
250
     "name": "stdout",
251
     "output_type": "stream",
252
     "text": [
253
      "Total of 1270 named asterisms from 34 cultures\n"
254
     ]
255
    },
256
    {
257
     "data": {
258
      "text/html": [
259
       "<div>\n",
260
       "<style scoped>\n",
261
       "    .dataframe tbody tr th:only-of-type {\n",
262
       "        vertical-align: middle;\n",
263
       "    }\n",
264
       "\n",
265
       "    .dataframe tbody tr th {\n",
266
       "        vertical-align: top;\n",
267
       "    }\n",
268
       "\n",
269
       "    .dataframe thead th {\n",
270
       "        text-align: right;\n",
271
       "    }\n",
272
       "</style>\n",
273
       "<table border=\"1\" class=\"dataframe\">\n",
274
       "  <thead>\n",
275
       "    <tr style=\"text-align: right;\">\n",
276
       "      <th></th>\n",
277
       "      <th>num_pairs</th>\n",
278
       "      <th>star</th>\n",
279
       "      <th>name</th>\n",
280
       "      <th>ra</th>\n",
281
       "      <th>dec</th>\n",
282
       "      <th>culture</th>\n",
283
       "      <th>color</th>\n",
284
       "    </tr>\n",
285
       "  </thead>\n",
286
       "  <tbody>\n",
287
       "    <tr>\n",
288
       "      <th>3</th>\n",
289
       "      <td>58</td>\n",
290
       "      <td>17772</td>\n",
291
       "      <td>Yai</td>\n",
292
       "      <td>3.805021</td>\n",
293
       "      <td>50.736767</td>\n",
294
       "      <td>tukano</td>\n",
295
       "      <td>#ae7b3a</td>\n",
296
       "    </tr>\n",
297
       "    <tr>\n",
298
       "      <th>5</th>\n",
299
       "      <td>43</td>\n",
300
       "      <td>78820</td>\n",
301
       "      <td>Aña</td>\n",
302
       "      <td>16.090620</td>\n",
303
       "      <td>-19.805453</td>\n",
304
       "      <td>tukano</td>\n",
305
       "      <td>#ae7b3a</td>\n",
306
       "    </tr>\n",
307
       "    <tr>\n",
308
       "      <th>2</th>\n",
309
       "      <td>43</td>\n",
310
       "      <td>78820</td>\n",
311
       "      <td>Ema (Guira-nhandu)</td>\n",
312
       "      <td>16.090620</td>\n",
313
       "      <td>-19.805453</td>\n",
314
       "      <td>tupi</td>\n",
315
       "      <td>#d23958</td>\n",
316
       "    </tr>\n",
317
       "    <tr>\n",
318
       "      <th>3</th>\n",
319
       "      <td>42</td>\n",
320
       "      <td>60718</td>\n",
321
       "      <td>Veado</td>\n",
322
       "      <td>12.443311</td>\n",
323
       "      <td>-63.099092</td>\n",
324
       "      <td>tupi</td>\n",
325
       "      <td>#d23958</td>\n",
326
       "    </tr>\n",
327
       "    <tr>\n",
328
       "      <th>92</th>\n",
329
       "      <td>39</td>\n",
330
       "      <td>1170</td>\n",
331
       "      <td>URimGuun</td>\n",
332
       "      <td>0.244005</td>\n",
333
       "      <td>-18.932866</td>\n",
334
       "      <td>korean</td>\n",
335
       "      <td>#2f8cae</td>\n",
336
       "    </tr>\n",
337
       "  </tbody>\n",
338
       "</table>\n",
339
       "</div>"
340
      ],
341
      "text/plain": [
342
       "    num_pairs   star                name         ra        dec culture  \\\n",
343
       "3          58  17772                 Yai   3.805021  50.736767  tukano   \n",
344
       "5          43  78820                 Aña  16.090620 -19.805453  tukano   \n",
345
       "2          43  78820  Ema (Guira-nhandu)  16.090620 -19.805453    tupi   \n",
346
       "3          42  60718               Veado  12.443311 -63.099092    tupi   \n",
347
       "92         39   1170            URimGuun   0.244005 -18.932866  korean   \n",
348
       "\n",
349
       "      color  \n",
350
       "3   #ae7b3a  \n",
351
       "5   #ae7b3a  \n",
352
       "2   #d23958  \n",
353
       "3   #d23958  \n",
354
       "92  #2f8cae  "
355
      ]
356
     },
357
     "metadata": {},
358
     "output_type": "display_data"
359
    }
360
   ],
361
   "source": [
362
    "'''\n",
363
    "Map each named asterism to the correct color using the colormap.csv file. \n",
364
    "Combine data from all cultures into one dataframe. \n",
365
    "'''\n",
366
    "\n",
367
    "names = sorted(glob.glob('./data/processed/skycultures/asterism_names/*_names.csv'))\n",
368
    "colors = pd.read_csv(\"./data/processed/colormap.csv\")\n",
369
    "\n",
370
    "namedf = pd.DataFrame()\n",
371
    "for name in names:\n",
372
    "    temp = pd.read_csv(name, encoding=\"utf-8\")\n",
373
    "    culture = temp['culture'].unique()\n",
374
    "    assert len(culture) == 1\n",
375
    "    culture = culture[0]\n",
376
    "    color = colors[colors['culture'] == culture]['color'].tolist()\n",
377
    "    assert len(color) == 1\n",
378
    "    color = color[0]\n",
379
    "    temp['color'] = color\n",
380
    "    namedf = pd.concat([namedf, temp])\n",
381
    "    \n",
382
    "namedf.sort_values(by='num_pairs', ascending=False, inplace=True)    \n",
383
    "print('Total of', len(namedf), 'named asterisms from', len(namedf.culture.unique()), 'cultures')\n",
384
    "display(namedf.head())\n",
385
    "namedf.to_csv('./data/processed/named_asterisms_to_plot.csv', index=False)"
386
   ]
387
  },
388
  {
389
   "cell_type": "code",
390
   "execution_count": null,
391
   "metadata": {},
392
   "outputs": [],
393
   "source": []
394
  }
395
 ],
396
 "metadata": {
397
  "kernelspec": {
398
   "display_name": "Python 3",
399
   "language": "python",
400
   "name": "python3"
401
  },
402
  "language_info": {
403
   "codemirror_mode": {
404
    "name": "ipython",
405
    "version": 3
406
   },
407
   "file_extension": ".py",
408
   "mimetype": "text/x-python",
409
   "name": "python",
410
   "nbconvert_exporter": "python",
411
   "pygments_lexer": "ipython3",
412
   "version": "3.7.1"
413
  }
414
 },
415
 "nbformat": 4,
416
 "nbformat_minor": 2
417
}
418
atlas_of_worldstars

Использование cookies