atlas_of_moon_geology
/
1_process_moon_data.ipynb
244 строки · 8.0 Кб
1{
2"cells": [
3{
4"cell_type": "code",
5"execution_count": 1,
6"metadata": {},
7"outputs": [],
8"source": [
9"import pandas as pd\n",
10"import shapefile as shp\n",
11"import cartopy.io.shapereader as shpreader"
12]
13},
14{
15"cell_type": "code",
16"execution_count": 2,
17"metadata": {},
18"outputs": [
19{
20"name": "stdout",
21"output_type": "stream",
22"text": [
23"shapefile 2.0.1\n",
24"watermark 1.8.1\n",
25"cartopy 0.17.0\n",
26"pandas 0.23.4\n",
27"ELEANOR LUTZ 2019-08-24 \n",
28"\n",
29"CPython 3.7.1\n",
30"IPython 7.2.0\n",
31"\n",
32"compiler : MSC v.1900 64 bit (AMD64)\n",
33"system : Windows\n",
34"release : 10\n",
35"machine : AMD64\n",
36"processor : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel\n",
37"CPU cores : 12\n",
38"interpreter: 64bit\n"
39]
40}
41],
42"source": [
43"# Watermark is not required for this code, but is included for information. \n",
44"import watermark\n",
45"%load_ext watermark\n",
46"%watermark -a \"ELEANOR LUTZ\" -d -v -iv -m"
47]
48},
49{
50"cell_type": "code",
51"execution_count": 3,
52"metadata": {},
53"outputs": [],
54"source": [
55"# Labels used by the USGS to demark each of the different datasets\n",
56"datasets = ['I-0703', 'I-0948', 'I-1034', 'I-1047', 'I-1062', 'I-1162']"
57]
58},
59{
60"cell_type": "code",
61"execution_count": 4,
62"metadata": {},
63"outputs": [
64{
65"data": {
66"text/html": [
67"<div>\n",
68"<style scoped>\n",
69" .dataframe tbody tr th:only-of-type {\n",
70" vertical-align: middle;\n",
71" }\n",
72"\n",
73" .dataframe tbody tr th {\n",
74" vertical-align: top;\n",
75" }\n",
76"\n",
77" .dataframe thead th {\n",
78" text-align: right;\n",
79" }\n",
80"</style>\n",
81"<table border=\"1\" class=\"dataframe\">\n",
82" <thead>\n",
83" <tr style=\"text-align: right;\">\n",
84" <th></th>\n",
85" <th>UnitSymbol</th>\n",
86" <th>UnitName</th>\n",
87" <th>MajorGroup</th>\n",
88" <th>UnitDescri</th>\n",
89" <th>Data_source</th>\n",
90" <th>Duplicated</th>\n",
91" </tr>\n",
92" </thead>\n",
93" <tbody>\n",
94" <tr>\n",
95" <th>86</th>\n",
96" <td>cf</td>\n",
97" <td></td>\n",
98" <td></td>\n",
99" <td></td>\n",
100" <td>I-1062</td>\n",
101" <td>False</td>\n",
102" </tr>\n",
103" <tr>\n",
104" <th>133</th>\n",
105" <td>INbl</td>\n",
106" <td>Undivided Lineated Basin Material</td>\n",
107" <td>Basin Materials</td>\n",
108" <td>Undivided Lineated Basin Material, Imbrian and...</td>\n",
109" <td>I-1062</td>\n",
110" <td>False</td>\n",
111" </tr>\n",
112" <tr>\n",
113" <th>23</th>\n",
114" <td>Ia</td>\n",
115" <td>Alpes Formation</td>\n",
116" <td>Basin Materials</td>\n",
117" <td>Alpes Formation, Imbrian System</td>\n",
118" <td>I-1062</td>\n",
119" <td>False</td>\n",
120" </tr>\n",
121" <tr>\n",
122" <th>70</th>\n",
123" <td>If</td>\n",
124" <td>Fra Mauro Formation</td>\n",
125" <td>Basin Materials</td>\n",
126" <td>Fra Mauro Formation, Imbrian System</td>\n",
127" <td>I-1062</td>\n",
128" <td>True</td>\n",
129" </tr>\n",
130" <tr>\n",
131" <th>822</th>\n",
132" <td>Iic</td>\n",
133" <td>Material of Imbrium-Basin Secondary-Impact Cra...</td>\n",
134" <td>Basin Materials</td>\n",
135" <td>Material of Imbrium-Basin Secondary-Impact Cra...</td>\n",
136" <td>I-1162</td>\n",
137" <td>False</td>\n",
138" </tr>\n",
139" </tbody>\n",
140"</table>\n",
141"</div>"
142],
143"text/plain": [
144" UnitSymbol UnitName \\\n",
145"86 cf \n",
146"133 INbl Undivided Lineated Basin Material \n",
147"23 Ia Alpes Formation \n",
148"70 If Fra Mauro Formation \n",
149"822 Iic Material of Imbrium-Basin Secondary-Impact Cra... \n",
150"\n",
151" MajorGroup UnitDescri \\\n",
152"86 \n",
153"133 Basin Materials Undivided Lineated Basin Material, Imbrian and... \n",
154"23 Basin Materials Alpes Formation, Imbrian System \n",
155"70 Basin Materials Fra Mauro Formation, Imbrian System \n",
156"822 Basin Materials Material of Imbrium-Basin Secondary-Impact Cra... \n",
157"\n",
158" Data_source Duplicated \n",
159"86 I-1062 False \n",
160"133 I-1062 False \n",
161"23 I-1062 False \n",
162"70 I-1062 True \n",
163"822 I-1162 False "
164]
165},
166"metadata": {},
167"output_type": "display_data"
168},
169{
170"name": "stdout",
171"output_type": "stream",
172"text": [
173"87 duplicated symbols\n",
174"107 unit symbols that are only found in one dataset\n"
175]
176}
177],
178"source": [
179"# Create a master dataframe containing all geologic unit descriptions and symbols\n",
180"# This dataframe is used to assign colors for each geologic unit. \n",
181"totaldf = pd.DataFrame()\n",
182"\n",
183"for s in datasets:\n",
184" s2 = s.replace('-','_')\n",
185" fname = \"A:/gitrepos/geology_atlas_of_space/data/Lunar_Geologic_GIS_Renovation_March2013/\"+\\\n",
186" s+\"/Shapefiles/\"+s2+\"_Geology.shp\"\n",
187" shp = shpreader.Reader(fname)\n",
188" unitsymbols, unitnames, majorgroups, unitdescs = [], [], [], []\n",
189" for record, state in zip(shp.records(), shp.geometries()):\n",
190" unitsymbols.append(record.attributes['UnitSymbol'])\n",
191" try:\n",
192" unitnames.append(record.attributes['UnitName'])\n",
193" except:\n",
194" unitnames.append(record.attributes['UnitName_1'])\n",
195" majorgroups.append(record.attributes['MajorGroup'])\n",
196" unitdescs.append(record.attributes['UnitDescri'])\n",
197" \n",
198" tempdf = pd.DataFrame.from_dict({'UnitSymbol':unitsymbols, 'UnitName':unitnames,\n",
199" 'MajorGroup':majorgroups, 'UnitDescri':unitdescs})\n",
200" tempdf.drop_duplicates(subset='UnitSymbol', inplace=True, keep='first')\n",
201" tempdf['Data_source'] = s\n",
202" totaldf = pd.concat([totaldf, tempdf])\n",
203" \n",
204"totaldf['Duplicated'] = totaldf.duplicated(subset='UnitSymbol')\n",
205"totaldf.sort_values(by=['MajorGroup', 'UnitSymbol'], inplace=True)\n",
206"totaldf.dropna(subset=['UnitSymbol'], inplace=True)\n",
207"totaldf = totaldf[totaldf['UnitSymbol'].str.len() > 0]\n",
208"totaldf.to_csv('./data/unit_descriptions_from_files.csv', index=False)\n",
209"\n",
210"display(totaldf.head())\n",
211"print(len(totaldf[totaldf['Duplicated'] == True]), 'duplicated symbols')\n",
212"print(len(totaldf[totaldf['Duplicated'] == False]), 'unit symbols that are only found in one dataset')"
213]
214},
215{
216"cell_type": "code",
217"execution_count": null,
218"metadata": {},
219"outputs": [],
220"source": []
221}
222],
223"metadata": {
224"kernelspec": {
225"display_name": "Python 3",
226"language": "python",
227"name": "python3"
228},
229"language_info": {
230"codemirror_mode": {
231"name": "ipython",
232"version": 3
233},
234"file_extension": ".py",
235"mimetype": "text/x-python",
236"name": "python",
237"nbconvert_exporter": "python",
238"pygments_lexer": "ipython3",
239"version": "3.7.1"
240}
241},
242"nbformat": 4,
243"nbformat_minor": 2
244}
245