atlas_of_worldstars
/
3_process_asterism_names.ipynb
417 строк · 13.6 Кб
1{
2"cells": [
3{
4"cell_type": "code",
5"execution_count": 1,
6"metadata": {},
7"outputs": [],
8"source": [
9"import pandas as pd\n",
10"import numpy as np\n",
11"import glob"
12]
13},
14{
15"cell_type": "code",
16"execution_count": 2,
17"metadata": {},
18"outputs": [
19{
20"name": "stdout",
21"output_type": "stream",
22"text": [
23"watermark 1.8.1\n",
24"numpy 1.15.4\n",
25"pandas 0.23.4\n",
26"ELEANOR LUTZ 2019-07-26 \n",
27"\n",
28"CPython 3.7.1\n",
29"IPython 7.2.0\n",
30"\n",
31"compiler : MSC v.1900 64 bit (AMD64)\n",
32"system : Windows\n",
33"release : 10\n",
34"machine : AMD64\n",
35"processor : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
36"CPU cores : 12\n",
37"interpreter: 64bit\n"
38]
39}
40],
41"source": [
42"# Watermark is not required for this code, but is included for information. \n",
43"import watermark\n",
44"%load_ext watermark\n",
45"%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
46]
47},
48{
49"cell_type": "code",
50"execution_count": 3,
51"metadata": {},
52"outputs": [
53{
54"name": "stdout",
55"output_type": "stream",
56"text": [
57"['arabic', 'arabic_moon_stations', 'armintxe', 'aztec', 'belarusian', 'boorong', 'chinese_medieval', 'dakota', 'egyptian', 'hawaiian_starlines', 'indian', 'inuit', 'japanese_moon_stations', 'kamilaroi', 'korean', 'lokono', 'macedonian', 'maori', 'maya', 'mongolian', 'mulapin', 'navajo', 'norse', 'northern_andes', 'ojibwe', 'romanian', 'sami', 'sardinian', 'seleucid', 'siberian', 'tongan', 'tukano', 'tupi', 'western']\n"
58]
59}
60],
61"source": [
62"# Read in processed star data (created in 1_process_starbase_data.ipynb)\n",
63"hip_df = pd.read_csv(\"./data/processed/hygdata_processed.csv\", low_memory=False)\n",
64"\n",
65"# Exclude \"culture\" folders that are replicated in other folders. \n",
66"# \"chinese_medieval\" and \"western\" are used instead of the alternates in the list below (\"exclude\") \n",
67"exclude = ['western_SnT', 'western_hlad', 'western_rey', 'chinese_contemporary', 'chinese']\n",
68"fabfiles = glob.glob('./data/skycultures/*/')\n",
69"fabfiles = [x.split(\"\\\\\")[-2] for x in fabfiles]\n",
70"fabfiles = [x for x in fabfiles if x not in exclude]\n",
71"\n",
72"# Print all cultures that will be analyzed in this Jupyter notebook\n",
73"print(fabfiles)"
74]
75},
76{
77"cell_type": "code",
78"execution_count": 4,
79"metadata": {
80"scrolled": false
81},
82"outputs": [
83{
84"data": {
85"text/html": [
86"<div>\n",
87"<style scoped>\n",
88" .dataframe tbody tr th:only-of-type {\n",
89" vertical-align: middle;\n",
90" }\n",
91"\n",
92" .dataframe tbody tr th {\n",
93" vertical-align: top;\n",
94" }\n",
95"\n",
96" .dataframe thead th {\n",
97" text-align: right;\n",
98" }\n",
99"</style>\n",
100"<table border=\"1\" class=\"dataframe\">\n",
101" <thead>\n",
102" <tr style=\"text-align: right;\">\n",
103" <th></th>\n",
104" <th>num_pairs</th>\n",
105" <th>star</th>\n",
106" <th>name</th>\n",
107" <th>ra</th>\n",
108" <th>dec</th>\n",
109" <th>culture</th>\n",
110" </tr>\n",
111" </thead>\n",
112" <tbody>\n",
113" <tr>\n",
114" <th>40</th>\n",
115" <td>9</td>\n",
116" <td>114131</td>\n",
117" <td>Grus</td>\n",
118" <td>23.1147</td>\n",
119" <td>-43.5204</td>\n",
120" <td>western</td>\n",
121" </tr>\n",
122" <tr>\n",
123" <th>9</th>\n",
124" <td>9</td>\n",
125" <td>71795</td>\n",
126" <td>Bootes</td>\n",
127" <td>14.6858</td>\n",
128" <td>13.7283</td>\n",
129" <td>western</td>\n",
130" </tr>\n",
131" <tr>\n",
132" <th>28</th>\n",
133" <td>9</td>\n",
134" <td>94779</td>\n",
135" <td>Cygnus</td>\n",
136" <td>19.285</td>\n",
137" <td>53.3685</td>\n",
138" <td>western</td>\n",
139" </tr>\n",
140" <tr>\n",
141" <th>24</th>\n",
142" <td>9</td>\n",
143" <td>91875</td>\n",
144" <td>Corona Australis</td>\n",
145" <td>18.7297</td>\n",
146" <td>-38.3234</td>\n",
147" <td>western</td>\n",
148" </tr>\n",
149" <tr>\n",
150" <th>23</th>\n",
151" <td>9</td>\n",
152" <td>53740</td>\n",
153" <td>Crater</td>\n",
154" <td>10.9962</td>\n",
155" <td>-18.2988</td>\n",
156" <td>western</td>\n",
157" </tr>\n",
158" </tbody>\n",
159"</table>\n",
160"</div>"
161],
162"text/plain": [
163" num_pairs star name ra dec culture\n",
164"40 9 114131 Grus 23.1147 -43.5204 western\n",
165"9 9 71795 Bootes 14.6858 13.7283 western\n",
166"28 9 94779 Cygnus 19.285 53.3685 western\n",
167"24 9 91875 Corona Australis 18.7297 -38.3234 western\n",
168"23 9 53740 Crater 10.9962 -18.2988 western"
169]
170},
171"metadata": {},
172"output_type": "display_data"
173}
174],
175"source": [
176"'''\n",
177"Make a table (df) of all named asterisms, as well as the RA/DEC location information\n",
178"(this code just uses one of the stars as the plotting location).\n",
179"The num_pairs column describes how many lines (star pairs) are included in the asterism. \n",
180"The result is saved as a separate file for each culture. \n",
181"'''\n",
182"\n",
183"for name in fabfiles: \n",
184" fname = './data/skycultures/'+name+'/constellationship.fab'\n",
185"\n",
186" df = pd.read_csv(fname, header=None, encoding='utf-8')\n",
187" df['constellation'] = df[0].str.split().str.get(0).astype(str)\n",
188" df['num_pairs'] = df[0].str.split().str.get(1)\n",
189" df['star'] = df[0].str.split().str[2]\n",
190" df.drop(0, axis=1, inplace=True)\n",
191"\n",
192" # Merge by names of constellations\n",
193" lines = []\n",
194" if name == 'chinese_medieval': \n",
195" # names available in original language characters\n",
196" readname = './data/skycultures/'+name+'/constellation_names.zh_CN.fab'\n",
197" else: \n",
198" readname = './data/skycultures/'+name+'/constellation_names.eng.fab'\n",
199" with open(readname, encoding='utf-8') as f: \n",
200" for line in f: \n",
201" if line[0] != '#':\n",
202" lines.append(str(line))\n",
203" newlines = []\n",
204" for line in lines:\n",
205" line = line.replace(\"\\n\",\"\").replace(\"_\",\"\").split('\"')\n",
206" line[0] = line[0].strip()\n",
207" if len(line[0]) != 0:\n",
208" if len(line[1]) == 0:\n",
209" line[1] = line[3]\n",
210" newlines.append(line)\n",
211" \n",
212" cs = [x[0] for x in newlines]\n",
213" ns = [x[1] for x in newlines]\n",
214" df_names = pd.DataFrame.from_dict({\"constellation\":cs, \"name\":ns})\n",
215" df = pd.merge(df, df_names, on=\"constellation\", how='left')\n",
216"\n",
217" # Merge star locations back into original database\n",
218" df['ra'] = ''\n",
219" df['dec'] = ''\n",
220" for index, row in df.iterrows(): \n",
221" star = row['star']\n",
222" # Manually fix problematic star IDs\n",
223" # SOURCE: Hipparcos catalog. Mapped to HD ID identifier. \n",
224" # http://tdc-www.harvard.edu/catalogs/hipparcos.html\n",
225" # https://www.cosmos.esa.int/web/hipparcos/search-facility\n",
226" if star == '78727':\n",
227" temp = hip_df[hip_df['hd']== 144069.]\n",
228" else:\n",
229" temp = hip_df[hip_df['hip']==float(star)]\n",
230" if len(temp) != 1:\n",
231" print(star, 'has issues in merging with HIP catalogue')\n",
232" display(temp.head())\n",
233" df.at[index, 'ra'] = temp['ra'].tolist()[0]\n",
234" df.at[index, 'dec'] = temp['dec'].tolist()[0]\n",
235"\n",
236" df['culture'] = name\n",
237" df.drop('constellation', axis=1, inplace=True)\n",
238" df.sort_values(by='num_pairs', ascending=False, inplace=True)\n",
239" df.to_csv('./data/processed/skycultures/asterism_names/'+name+'_names.csv', index=False)\n",
240" \n",
241"display(df.head())"
242]
243},
244{
245"cell_type": "code",
246"execution_count": 5,
247"metadata": {},
248"outputs": [
249{
250"name": "stdout",
251"output_type": "stream",
252"text": [
253"Total of 1270 named asterisms from 34 cultures\n"
254]
255},
256{
257"data": {
258"text/html": [
259"<div>\n",
260"<style scoped>\n",
261" .dataframe tbody tr th:only-of-type {\n",
262" vertical-align: middle;\n",
263" }\n",
264"\n",
265" .dataframe tbody tr th {\n",
266" vertical-align: top;\n",
267" }\n",
268"\n",
269" .dataframe thead th {\n",
270" text-align: right;\n",
271" }\n",
272"</style>\n",
273"<table border=\"1\" class=\"dataframe\">\n",
274" <thead>\n",
275" <tr style=\"text-align: right;\">\n",
276" <th></th>\n",
277" <th>num_pairs</th>\n",
278" <th>star</th>\n",
279" <th>name</th>\n",
280" <th>ra</th>\n",
281" <th>dec</th>\n",
282" <th>culture</th>\n",
283" <th>color</th>\n",
284" </tr>\n",
285" </thead>\n",
286" <tbody>\n",
287" <tr>\n",
288" <th>3</th>\n",
289" <td>58</td>\n",
290" <td>17772</td>\n",
291" <td>Yai</td>\n",
292" <td>3.805021</td>\n",
293" <td>50.736767</td>\n",
294" <td>tukano</td>\n",
295" <td>#ae7b3a</td>\n",
296" </tr>\n",
297" <tr>\n",
298" <th>5</th>\n",
299" <td>43</td>\n",
300" <td>78820</td>\n",
301" <td>Aña</td>\n",
302" <td>16.090620</td>\n",
303" <td>-19.805453</td>\n",
304" <td>tukano</td>\n",
305" <td>#ae7b3a</td>\n",
306" </tr>\n",
307" <tr>\n",
308" <th>2</th>\n",
309" <td>43</td>\n",
310" <td>78820</td>\n",
311" <td>Ema (Guira-nhandu)</td>\n",
312" <td>16.090620</td>\n",
313" <td>-19.805453</td>\n",
314" <td>tupi</td>\n",
315" <td>#d23958</td>\n",
316" </tr>\n",
317" <tr>\n",
318" <th>3</th>\n",
319" <td>42</td>\n",
320" <td>60718</td>\n",
321" <td>Veado</td>\n",
322" <td>12.443311</td>\n",
323" <td>-63.099092</td>\n",
324" <td>tupi</td>\n",
325" <td>#d23958</td>\n",
326" </tr>\n",
327" <tr>\n",
328" <th>92</th>\n",
329" <td>39</td>\n",
330" <td>1170</td>\n",
331" <td>URimGuun</td>\n",
332" <td>0.244005</td>\n",
333" <td>-18.932866</td>\n",
334" <td>korean</td>\n",
335" <td>#2f8cae</td>\n",
336" </tr>\n",
337" </tbody>\n",
338"</table>\n",
339"</div>"
340],
341"text/plain": [
342" num_pairs star name ra dec culture \\\n",
343"3 58 17772 Yai 3.805021 50.736767 tukano \n",
344"5 43 78820 Aña 16.090620 -19.805453 tukano \n",
345"2 43 78820 Ema (Guira-nhandu) 16.090620 -19.805453 tupi \n",
346"3 42 60718 Veado 12.443311 -63.099092 tupi \n",
347"92 39 1170 URimGuun 0.244005 -18.932866 korean \n",
348"\n",
349" color \n",
350"3 #ae7b3a \n",
351"5 #ae7b3a \n",
352"2 #d23958 \n",
353"3 #d23958 \n",
354"92 #2f8cae "
355]
356},
357"metadata": {},
358"output_type": "display_data"
359}
360],
361"source": [
362"'''\n",
363"Map each named asterism to the correct color using the colormap.csv file. \n",
364"Combine data from all cultures into one dataframe. \n",
365"'''\n",
366"\n",
367"names = sorted(glob.glob('./data/processed/skycultures/asterism_names/*_names.csv'))\n",
368"colors = pd.read_csv(\"./data/processed/colormap.csv\")\n",
369"\n",
370"namedf = pd.DataFrame()\n",
371"for name in names:\n",
372" temp = pd.read_csv(name, encoding=\"utf-8\")\n",
373" culture = temp['culture'].unique()\n",
374" assert len(culture) == 1\n",
375" culture = culture[0]\n",
376" color = colors[colors['culture'] == culture]['color'].tolist()\n",
377" assert len(color) == 1\n",
378" color = color[0]\n",
379" temp['color'] = color\n",
380" namedf = pd.concat([namedf, temp])\n",
381" \n",
382"namedf.sort_values(by='num_pairs', ascending=False, inplace=True) \n",
383"print('Total of', len(namedf), 'named asterisms from', len(namedf.culture.unique()), 'cultures')\n",
384"display(namedf.head())\n",
385"namedf.to_csv('./data/processed/named_asterisms_to_plot.csv', index=False)"
386]
387},
388{
389"cell_type": "code",
390"execution_count": null,
391"metadata": {},
392"outputs": [],
393"source": []
394}
395],
396"metadata": {
397"kernelspec": {
398"display_name": "Python 3",
399"language": "python",
400"name": "python3"
401},
402"language_info": {
403"codemirror_mode": {
404"name": "ipython",
405"version": 3
406},
407"file_extension": ".py",
408"mimetype": "text/x-python",
409"name": "python",
410"nbconvert_exporter": "python",
411"pygments_lexer": "ipython3",
412"version": "3.7.1"
413}
414},
415"nbformat": 4,
416"nbformat_minor": 2
417}
418