ClickHouse
362 строки · 10.2 Кб
1#include <array>
2#include <cmath>
3#include <cassert>
4#include <Functions/GeoHash.h>
5
6
7namespace DB
8{
9
10namespace
11{
12
13const char geohash_base32_encode_lookup_table[32] = {
14'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
15'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm',
16'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
17'y', 'z',
18};
19
20// TODO: this could be halved by excluding 128-255 range.
21const uint8_t geohash_base32_decode_lookup_table[256] = {
220xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
230xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
240xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
250, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
260xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
270xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
280xFF, 0xFF, 10, 11, 12, 13, 14, 15, 16, 0xFF, 17, 18, 0xFF, 19, 20, 0xFF,
2921, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
300xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
310xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
320xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
330xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
340xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
350xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
360xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
370xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
38};
39
40const size_t BITS_PER_SYMBOL = 5;
41const size_t MAX_PRECISION = 12;
42const size_t MAX_BITS = (MAX_PRECISION * BITS_PER_SYMBOL * 3) / 2;
43const Float64 LON_MIN = -180;
44const Float64 LON_MAX = 180;
45const Float64 LAT_MIN = -90;
46const Float64 LAT_MAX = 90;
47
48using Encoded = std::array<uint8_t, MAX_BITS>;
49
50enum CoordType
51{
52LATITUDE,
53LONGITUDE,
54};
55
56inline uint8_t singleCoordBitsPrecision(uint8_t precision, CoordType type)
57{
58// Single coordinate occupies only half of the total bits.
59const uint8_t bits = (precision * BITS_PER_SYMBOL) / 2;
60if (precision & 0x1 && type == LONGITUDE)
61{
62return bits + 1;
63}
64
65return bits;
66}
67
68inline Encoded encodeCoordinate(Float64 coord, Float64 min, Float64 max, uint8_t bits)
69{
70Encoded result;
71result.fill(0);
72
73for (size_t i = 0; i < bits; ++i)
74{
75const Float64 mid = (max + min) / 2;
76if (coord >= mid)
77{
78result[i] = 1;
79min = mid;
80}
81else
82{
83result[i] = 0;
84max = mid;
85}
86}
87
88return result;
89}
90
91inline Float64 decodeCoordinate(const Encoded & coord, Float64 min, Float64 max, uint8_t bits)
92{
93Float64 mid = (max + min) / 2;
94for (size_t i = 0; i < bits; ++i)
95{
96const auto c = coord[i];
97if (c == 1)
98{
99min = mid;
100}
101else
102{
103max = mid;
104}
105
106mid = (max + min) / 2;
107}
108
109return mid;
110}
111
112inline Encoded merge(const Encoded & encodedLon, const Encoded & encodedLat, uint8_t precision)
113{
114Encoded result;
115result.fill(0);
116
117const auto bits = (precision * BITS_PER_SYMBOL) / 2;
118assert(bits < 255);
119uint8_t i = 0;
120for (; i < bits; ++i)
121{
122result[i * 2 + 0] = encodedLon[i];
123result[i * 2 + 1] = encodedLat[i];
124}
125// in case of even precision, add last bit of longitude
126if (precision & 0x1)
127{
128result[i * 2] = encodedLon[i];
129}
130
131return result;
132}
133
134inline std::tuple<Encoded, Encoded> split(const Encoded & combined, uint8_t precision)
135{
136Encoded lat, lon;
137lat.fill(0);
138lon.fill(0);
139
140size_t i = 0;
141for (; i < precision * BITS_PER_SYMBOL - 1; i += 2)
142{
143// longitude is even bits
144lon[i / 2] = combined[i];
145lat[i / 2] = combined[i + 1];
146}
147// precision is even, read the last bit as lat.
148if (precision & 0x1)
149{
150lon[i / 2] = combined[precision * BITS_PER_SYMBOL - 1];
151}
152
153return std::tie(lon, lat);
154}
155
156inline void base32Encode(const Encoded & binary, uint8_t precision, char * out)
157{
158extern const char geohash_base32_encode_lookup_table[32];
159
160for (size_t i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL)
161{
162uint8_t v = binary[i];
163v <<= 1;
164v |= binary[i + 1];
165v <<= 1;
166v |= binary[i + 2];
167v <<= 1;
168v |= binary[i + 3];
169v <<= 1;
170v |= binary[i + 4];
171
172assert(v < 32);
173
174*out = geohash_base32_encode_lookup_table[v];
175++out;
176}
177}
178
179inline Encoded base32Decode(const char * encoded_string, size_t encoded_length)
180{
181extern const uint8_t geohash_base32_decode_lookup_table[256];
182
183Encoded result;
184
185for (size_t i = 0; i < encoded_length; ++i)
186{
187const uint8_t c = static_cast<uint8_t>(encoded_string[i]);
188const uint8_t decoded = geohash_base32_decode_lookup_table[c] & 0x1F;
189result[i * 5 + 4] = (decoded >> 0) & 0x01;
190result[i * 5 + 3] = (decoded >> 1) & 0x01;
191result[i * 5 + 2] = (decoded >> 2) & 0x01;
192result[i * 5 + 1] = (decoded >> 3) & 0x01;
193result[i * 5 + 0] = (decoded >> 4) & 0x01;
194}
195
196return result;
197}
198
199inline Float64 getMaxSpan(CoordType type)
200{
201if (type == LONGITUDE)
202{
203return LON_MAX - LON_MIN;
204}
205
206return LAT_MAX - LAT_MIN;
207}
208
209inline Float64 getSpan(uint8_t precision, CoordType type)
210{
211const auto bits = singleCoordBitsPrecision(precision, type);
212// since every bit of precision divides span by 2, divide max span by 2^bits.
213return ldexp(getMaxSpan(type), -1 * bits);
214}
215
216inline uint8_t geohashPrecision(uint8_t precision)
217{
218if (precision == 0 || precision > MAX_PRECISION)
219precision = MAX_PRECISION;
220
221return precision;
222}
223
224inline size_t geohashEncodeImpl(Float64 longitude, Float64 latitude, uint8_t precision, char * out)
225{
226const Encoded combined = merge(
227encodeCoordinate(longitude, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE)),
228encodeCoordinate(latitude, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE)),
229precision);
230
231base32Encode(combined, precision, out);
232
233return precision;
234}
235
236}
237
238
239size_t geohashEncode(Float64 longitude, Float64 latitude, uint8_t precision, char * out)
240{
241precision = geohashPrecision(precision);
242return geohashEncodeImpl(longitude, latitude, precision, out);
243}
244
245void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * longitude, Float64 * latitude)
246{
247const uint8_t precision = std::min(encoded_len, static_cast<size_t>(MAX_PRECISION));
248if (precision == 0)
249{
250// Empty string is converted to (0, 0)
251*longitude = 0;
252*latitude = 0;
253return;
254}
255
256Encoded lat_encoded, lon_encoded;
257std::tie(lon_encoded, lat_encoded) = split(base32Decode(encoded_string, precision), precision);
258
259*longitude = decodeCoordinate(lon_encoded, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE));
260*latitude = decodeCoordinate(lat_encoded, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE));
261}
262
263GeohashesInBoxPreparedArgs geohashesInBoxPrepare(
264Float64 longitude_min,
265Float64 latitude_min,
266Float64 longitude_max,
267Float64 latitude_max,
268uint8_t precision)
269{
270precision = geohashPrecision(precision);
271
272if (longitude_max < longitude_min
273|| latitude_max < latitude_min
274|| std::isnan(longitude_min)
275|| std::isnan(longitude_max)
276|| std::isnan(latitude_min)
277|| std::isnan(latitude_max))
278{
279return {};
280}
281
282auto saturate = [](Float64 & value, Float64 min, Float64 max)
283{
284if (value < min)
285value = min;
286else if (value > max)
287value = max;
288};
289
290saturate(longitude_min, LON_MIN, LON_MAX);
291saturate(longitude_max, LON_MIN, LON_MAX);
292saturate(latitude_min, LAT_MIN, LAT_MAX);
293saturate(latitude_max, LAT_MIN, LAT_MAX);
294
295Float64 lon_step = getSpan(precision, LONGITUDE);
296Float64 lat_step = getSpan(precision, LATITUDE);
297
298/// Align max to the right (or up) border of geohash grid cell to ensure that cell is in result.
299Float64 lon_min = floor(longitude_min / lon_step) * lon_step;
300Float64 lat_min = floor(latitude_min / lat_step) * lat_step;
301Float64 lon_max = ceil(longitude_max / lon_step) * lon_step;
302Float64 lat_max = ceil(latitude_max / lat_step) * lat_step;
303
304UInt32 lon_items = static_cast<UInt32>((lon_max - lon_min) / lon_step);
305UInt32 lat_items = static_cast<UInt32>((lat_max - lat_min) / lat_step);
306
307return GeohashesInBoxPreparedArgs
308{
309std::max<UInt64>(1, static_cast<UInt64>(lon_items) * lat_items),
310lon_items,
311lat_items,
312lon_min,
313lat_min,
314lon_step,
315lat_step,
316precision
317};
318}
319
320UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out)
321{
322if (args.precision == 0
323|| args.precision > MAX_PRECISION
324|| args.longitude_step <= 0
325|| args.latitude_step <= 0)
326{
327return 0;
328}
329
330UInt64 items = 0;
331for (size_t i = 0; i < args.longitude_items; ++i)
332{
333for (size_t j = 0; j < args.latitude_items; ++j)
334{
335size_t length = geohashEncodeImpl(
336args.longitude_min + args.longitude_step * i,
337args.latitude_min + args.latitude_step * j,
338args.precision,
339out);
340
341out += length;
342*out = '\0';
343++out;
344
345++items;
346}
347}
348
349if (items == 0)
350{
351size_t length = geohashEncodeImpl(args.longitude_min, args.latitude_min, args.precision, out);
352out += length;
353*out = '\0';
354++out;
355
356++items;
357}
358
359return items;
360}
361
362}
363