atlas_of_moon_geology

Форк
0
/
1_process_moon_data.ipynb 
244 строки · 8.0 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import pandas as pd\n",
10
    "import shapefile as shp\n",
11
    "import cartopy.io.shapereader as shpreader"
12
   ]
13
  },
14
  {
15
   "cell_type": "code",
16
   "execution_count": 2,
17
   "metadata": {},
18
   "outputs": [
19
    {
20
     "name": "stdout",
21
     "output_type": "stream",
22
     "text": [
23
      "shapefile 2.0.1\n",
24
      "watermark 1.8.1\n",
25
      "cartopy   0.17.0\n",
26
      "pandas    0.23.4\n",
27
      "ELEANOR LUTZ 2019-08-24 \n",
28
      "\n",
29
      "CPython 3.7.1\n",
30
      "IPython 7.2.0\n",
31
      "\n",
32
      "compiler   : MSC v.1900 64 bit (AMD64)\n",
33
      "system     : Windows\n",
34
      "release    : 10\n",
35
      "machine    : AMD64\n",
36
      "processor  : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
37
      "CPU cores  : 12\n",
38
      "interpreter: 64bit\n"
39
     ]
40
    }
41
   ],
42
   "source": [
43
    "# Watermark is not required for this code, but is included for information. \n",
44
    "import watermark\n",
45
    "%load_ext watermark\n",
46
    "%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
47
   ]
48
  },
49
  {
50
   "cell_type": "code",
51
   "execution_count": 3,
52
   "metadata": {},
53
   "outputs": [],
54
   "source": [
55
    "# Labels used by the USGS to demark each of the different datasets\n",
56
    "datasets = ['I-0703', 'I-0948', 'I-1034', 'I-1047', 'I-1062', 'I-1162']"
57
   ]
58
  },
59
  {
60
   "cell_type": "code",
61
   "execution_count": 4,
62
   "metadata": {},
63
   "outputs": [
64
    {
65
     "data": {
66
      "text/html": [
67
       "<div>\n",
68
       "<style scoped>\n",
69
       "    .dataframe tbody tr th:only-of-type {\n",
70
       "        vertical-align: middle;\n",
71
       "    }\n",
72
       "\n",
73
       "    .dataframe tbody tr th {\n",
74
       "        vertical-align: top;\n",
75
       "    }\n",
76
       "\n",
77
       "    .dataframe thead th {\n",
78
       "        text-align: right;\n",
79
       "    }\n",
80
       "</style>\n",
81
       "<table border=\"1\" class=\"dataframe\">\n",
82
       "  <thead>\n",
83
       "    <tr style=\"text-align: right;\">\n",
84
       "      <th></th>\n",
85
       "      <th>UnitSymbol</th>\n",
86
       "      <th>UnitName</th>\n",
87
       "      <th>MajorGroup</th>\n",
88
       "      <th>UnitDescri</th>\n",
89
       "      <th>Data_source</th>\n",
90
       "      <th>Duplicated</th>\n",
91
       "    </tr>\n",
92
       "  </thead>\n",
93
       "  <tbody>\n",
94
       "    <tr>\n",
95
       "      <th>86</th>\n",
96
       "      <td>cf</td>\n",
97
       "      <td></td>\n",
98
       "      <td></td>\n",
99
       "      <td></td>\n",
100
       "      <td>I-1062</td>\n",
101
       "      <td>False</td>\n",
102
       "    </tr>\n",
103
       "    <tr>\n",
104
       "      <th>133</th>\n",
105
       "      <td>INbl</td>\n",
106
       "      <td>Undivided Lineated Basin Material</td>\n",
107
       "      <td>Basin Materials</td>\n",
108
       "      <td>Undivided Lineated Basin Material, Imbrian and...</td>\n",
109
       "      <td>I-1062</td>\n",
110
       "      <td>False</td>\n",
111
       "    </tr>\n",
112
       "    <tr>\n",
113
       "      <th>23</th>\n",
114
       "      <td>Ia</td>\n",
115
       "      <td>Alpes Formation</td>\n",
116
       "      <td>Basin Materials</td>\n",
117
       "      <td>Alpes Formation, Imbrian System</td>\n",
118
       "      <td>I-1062</td>\n",
119
       "      <td>False</td>\n",
120
       "    </tr>\n",
121
       "    <tr>\n",
122
       "      <th>70</th>\n",
123
       "      <td>If</td>\n",
124
       "      <td>Fra Mauro Formation</td>\n",
125
       "      <td>Basin Materials</td>\n",
126
       "      <td>Fra Mauro Formation, Imbrian System</td>\n",
127
       "      <td>I-1062</td>\n",
128
       "      <td>True</td>\n",
129
       "    </tr>\n",
130
       "    <tr>\n",
131
       "      <th>822</th>\n",
132
       "      <td>Iic</td>\n",
133
       "      <td>Material of Imbrium-Basin Secondary-Impact Cra...</td>\n",
134
       "      <td>Basin Materials</td>\n",
135
       "      <td>Material of Imbrium-Basin Secondary-Impact Cra...</td>\n",
136
       "      <td>I-1162</td>\n",
137
       "      <td>False</td>\n",
138
       "    </tr>\n",
139
       "  </tbody>\n",
140
       "</table>\n",
141
       "</div>"
142
      ],
143
      "text/plain": [
144
       "    UnitSymbol                                           UnitName  \\\n",
145
       "86          cf                                                      \n",
146
       "133       INbl                  Undivided Lineated Basin Material   \n",
147
       "23          Ia                                    Alpes Formation   \n",
148
       "70          If                                Fra Mauro Formation   \n",
149
       "822        Iic  Material of Imbrium-Basin Secondary-Impact Cra...   \n",
150
       "\n",
151
       "          MajorGroup                                         UnitDescri  \\\n",
152
       "86                                                                        \n",
153
       "133  Basin Materials  Undivided Lineated Basin Material, Imbrian and...   \n",
154
       "23   Basin Materials                    Alpes Formation, Imbrian System   \n",
155
       "70   Basin Materials                Fra Mauro Formation, Imbrian System   \n",
156
       "822  Basin Materials  Material of Imbrium-Basin Secondary-Impact Cra...   \n",
157
       "\n",
158
       "    Data_source  Duplicated  \n",
159
       "86       I-1062       False  \n",
160
       "133      I-1062       False  \n",
161
       "23       I-1062       False  \n",
162
       "70       I-1062        True  \n",
163
       "822      I-1162       False  "
164
      ]
165
     },
166
     "metadata": {},
167
     "output_type": "display_data"
168
    },
169
    {
170
     "name": "stdout",
171
     "output_type": "stream",
172
     "text": [
173
      "87 duplicated symbols\n",
174
      "107 unit symbols that are only found in one dataset\n"
175
     ]
176
    }
177
   ],
178
   "source": [
179
    "# Create a master dataframe containing all geologic unit descriptions and symbols\n",
180
    "# This dataframe is used to assign colors for each geologic unit. \n",
181
    "totaldf = pd.DataFrame()\n",
182
    "\n",
183
    "for s in datasets:\n",
184
    "    s2 = s.replace('-','_')\n",
185
    "    fname = \"A:/gitrepos/geology_atlas_of_space/data/Lunar_Geologic_GIS_Renovation_March2013/\"+\\\n",
186
    "        s+\"/Shapefiles/\"+s2+\"_Geology.shp\"\n",
187
    "    shp = shpreader.Reader(fname)\n",
188
    "    unitsymbols, unitnames, majorgroups, unitdescs = [], [], [], []\n",
189
    "    for record, state in zip(shp.records(), shp.geometries()):\n",
190
    "        unitsymbols.append(record.attributes['UnitSymbol'])\n",
191
    "        try:\n",
192
    "            unitnames.append(record.attributes['UnitName'])\n",
193
    "        except:\n",
194
    "            unitnames.append(record.attributes['UnitName_1'])\n",
195
    "        majorgroups.append(record.attributes['MajorGroup'])\n",
196
    "        unitdescs.append(record.attributes['UnitDescri'])\n",
197
    "        \n",
198
    "    tempdf = pd.DataFrame.from_dict({'UnitSymbol':unitsymbols, 'UnitName':unitnames,\n",
199
    "                                     'MajorGroup':majorgroups, 'UnitDescri':unitdescs})\n",
200
    "    tempdf.drop_duplicates(subset='UnitSymbol', inplace=True, keep='first')\n",
201
    "    tempdf['Data_source'] = s\n",
202
    "    totaldf = pd.concat([totaldf, tempdf])\n",
203
    "    \n",
204
    "totaldf['Duplicated'] = totaldf.duplicated(subset='UnitSymbol')\n",
205
    "totaldf.sort_values(by=['MajorGroup', 'UnitSymbol'], inplace=True)\n",
206
    "totaldf.dropna(subset=['UnitSymbol'], inplace=True)\n",
207
    "totaldf = totaldf[totaldf['UnitSymbol'].str.len() > 0]\n",
208
    "totaldf.to_csv('./data/unit_descriptions_from_files.csv', index=False)\n",
209
    "\n",
210
    "display(totaldf.head())\n",
211
    "print(len(totaldf[totaldf['Duplicated'] == True]), 'duplicated symbols')\n",
212
    "print(len(totaldf[totaldf['Duplicated'] == False]), 'unit symbols that are only found in one dataset')"
213
   ]
214
  },
215
  {
216
   "cell_type": "code",
217
   "execution_count": null,
218
   "metadata": {},
219
   "outputs": [],
220
   "source": []
221
  }
222
 ],
223
 "metadata": {
224
  "kernelspec": {
225
   "display_name": "Python 3",
226
   "language": "python",
227
   "name": "python3"
228
  },
229
  "language_info": {
230
   "codemirror_mode": {
231
    "name": "ipython",
232
    "version": 3
233
   },
234
   "file_extension": ".py",
235
   "mimetype": "text/x-python",
236
   "name": "python",
237
   "nbconvert_exporter": "python",
238
   "pygments_lexer": "ipython3",
239
   "version": "3.7.1"
240
  }
241
 },
242
 "nbformat": 4,
243
 "nbformat_minor": 2
244
}
245

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.