ClickHouse

Форк
0
/
GeoHash.cpp 
362 строки · 10.2 Кб
1
#include <array>
2
#include <cmath>
3
#include <cassert>
4
#include <Functions/GeoHash.h>
5

6

7
namespace DB
8
{
9

10
namespace
11
{
12

13
const char geohash_base32_encode_lookup_table[32] = {
14
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
15
    'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm',
16
    'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
17
    'y', 'z',
18
};
19

20
// TODO: this could be halved by excluding 128-255 range.
21
const uint8_t geohash_base32_decode_lookup_table[256] = {
22
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
23
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
25
    0,    1,    2,    3,    4,    5,    6,    7,    8,    9,    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
26
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
27
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
28
    0xFF, 0xFF, 10,   11,   12,   13,   14,   15,   16,   0xFF, 17,   18,   0xFF, 19,   20,   0xFF,
29
    21,   22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
30
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
31
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
32
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
33
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
34
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
35
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
36
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
37
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
38
};
39

40
const size_t BITS_PER_SYMBOL = 5;
41
const size_t MAX_PRECISION = 12;
42
const size_t MAX_BITS = (MAX_PRECISION * BITS_PER_SYMBOL * 3) / 2;
43
const Float64 LON_MIN = -180;
44
const Float64 LON_MAX = 180;
45
const Float64 LAT_MIN = -90;
46
const Float64 LAT_MAX = 90;
47

48
using Encoded = std::array<uint8_t, MAX_BITS>;
49

50
enum CoordType
51
{
52
    LATITUDE,
53
    LONGITUDE,
54
};
55

56
inline uint8_t singleCoordBitsPrecision(uint8_t precision, CoordType type)
57
{
58
    // Single coordinate occupies only half of the total bits.
59
    const uint8_t bits = (precision * BITS_PER_SYMBOL) / 2;
60
    if (precision & 0x1 && type == LONGITUDE)
61
    {
62
        return bits + 1;
63
    }
64

65
    return bits;
66
}
67

68
inline Encoded encodeCoordinate(Float64 coord, Float64 min, Float64 max, uint8_t bits)
69
{
70
    Encoded result;
71
    result.fill(0);
72

73
    for (size_t i = 0; i < bits; ++i)
74
    {
75
        const Float64 mid = (max + min) / 2;
76
        if (coord >= mid)
77
        {
78
            result[i] = 1;
79
            min = mid;
80
        }
81
        else
82
        {
83
            result[i] = 0;
84
            max = mid;
85
        }
86
    }
87

88
    return result;
89
}
90

91
inline Float64 decodeCoordinate(const Encoded & coord, Float64 min, Float64 max, uint8_t bits)
92
{
93
    Float64 mid = (max + min) / 2;
94
    for (size_t i = 0; i < bits; ++i)
95
    {
96
        const auto c = coord[i];
97
        if (c == 1)
98
        {
99
            min = mid;
100
        }
101
        else
102
        {
103
            max = mid;
104
        }
105

106
        mid = (max + min) / 2;
107
    }
108

109
    return mid;
110
}
111

112
inline Encoded merge(const Encoded & encodedLon, const Encoded & encodedLat, uint8_t precision)
113
{
114
    Encoded result;
115
    result.fill(0);
116

117
    const auto bits = (precision * BITS_PER_SYMBOL) / 2;
118
    assert(bits < 255);
119
    uint8_t i = 0;
120
    for (; i < bits; ++i)
121
    {
122
        result[i * 2 + 0] = encodedLon[i];
123
        result[i * 2 + 1] = encodedLat[i];
124
    }
125
    // in case of even precision, add last bit of longitude
126
    if (precision & 0x1)
127
    {
128
        result[i * 2] = encodedLon[i];
129
    }
130

131
    return result;
132
}
133

134
inline std::tuple<Encoded, Encoded> split(const Encoded & combined, uint8_t precision)
135
{
136
    Encoded lat, lon;
137
    lat.fill(0);
138
    lon.fill(0);
139

140
    size_t i = 0;
141
    for (; i < precision * BITS_PER_SYMBOL - 1; i += 2)
142
    {
143
        // longitude is even bits
144
        lon[i / 2] = combined[i];
145
        lat[i / 2] = combined[i + 1];
146
    }
147
    // precision is even, read the last bit as lat.
148
    if (precision & 0x1)
149
    {
150
        lon[i / 2] = combined[precision * BITS_PER_SYMBOL - 1];
151
    }
152

153
    return std::tie(lon, lat);
154
}
155

156
inline void base32Encode(const Encoded & binary, uint8_t precision, char * out)
157
{
158
    extern const char geohash_base32_encode_lookup_table[32];
159

160
    for (size_t i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL)
161
    {
162
        uint8_t v = binary[i];
163
        v <<= 1;
164
        v |= binary[i + 1];
165
        v <<= 1;
166
        v |= binary[i + 2];
167
        v <<= 1;
168
        v |= binary[i + 3];
169
        v <<= 1;
170
        v |= binary[i + 4];
171

172
        assert(v < 32);
173

174
        *out = geohash_base32_encode_lookup_table[v];
175
        ++out;
176
    }
177
}
178

179
inline Encoded base32Decode(const char * encoded_string, size_t encoded_length)
180
{
181
    extern const uint8_t geohash_base32_decode_lookup_table[256];
182

183
    Encoded result;
184

185
    for (size_t i = 0; i < encoded_length; ++i)
186
    {
187
        const uint8_t c = static_cast<uint8_t>(encoded_string[i]);
188
        const uint8_t decoded = geohash_base32_decode_lookup_table[c] & 0x1F;
189
        result[i * 5 + 4] = (decoded >> 0) & 0x01;
190
        result[i * 5 + 3] = (decoded >> 1) & 0x01;
191
        result[i * 5 + 2] = (decoded >> 2) & 0x01;
192
        result[i * 5 + 1] = (decoded >> 3) & 0x01;
193
        result[i * 5 + 0] = (decoded >> 4) & 0x01;
194
    }
195

196
    return result;
197
}
198

199
inline Float64 getMaxSpan(CoordType type)
200
{
201
    if (type == LONGITUDE)
202
    {
203
        return LON_MAX - LON_MIN;
204
    }
205

206
    return LAT_MAX - LAT_MIN;
207
}
208

209
inline Float64 getSpan(uint8_t precision, CoordType type)
210
{
211
    const auto bits = singleCoordBitsPrecision(precision, type);
212
    // since every bit of precision divides span by 2, divide max span by 2^bits.
213
    return ldexp(getMaxSpan(type), -1 * bits);
214
}
215

216
inline uint8_t geohashPrecision(uint8_t precision)
217
{
218
    if (precision == 0 || precision > MAX_PRECISION)
219
        precision = MAX_PRECISION;
220

221
    return precision;
222
}
223

224
inline size_t geohashEncodeImpl(Float64 longitude, Float64 latitude, uint8_t precision, char * out)
225
{
226
    const Encoded combined = merge(
227
                encodeCoordinate(longitude, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE)),
228
                encodeCoordinate(latitude, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE)),
229
                precision);
230

231
    base32Encode(combined, precision, out);
232

233
    return precision;
234
}
235

236
}
237

238

239
size_t geohashEncode(Float64 longitude, Float64 latitude, uint8_t precision, char * out)
240
{
241
    precision = geohashPrecision(precision);
242
    return geohashEncodeImpl(longitude, latitude, precision, out);
243
}
244

245
void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * longitude, Float64 * latitude)
246
{
247
    const uint8_t precision = std::min(encoded_len, static_cast<size_t>(MAX_PRECISION));
248
    if (precision == 0)
249
    {
250
        // Empty string is converted to (0, 0)
251
        *longitude = 0;
252
        *latitude = 0;
253
        return;
254
    }
255

256
    Encoded lat_encoded, lon_encoded;
257
    std::tie(lon_encoded, lat_encoded) = split(base32Decode(encoded_string, precision), precision);
258

259
    *longitude = decodeCoordinate(lon_encoded, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE));
260
    *latitude = decodeCoordinate(lat_encoded, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE));
261
}
262

263
GeohashesInBoxPreparedArgs geohashesInBoxPrepare(
264
    Float64 longitude_min,
265
    Float64 latitude_min,
266
    Float64 longitude_max,
267
    Float64 latitude_max,
268
    uint8_t precision)
269
{
270
    precision = geohashPrecision(precision);
271

272
    if (longitude_max < longitude_min
273
        || latitude_max < latitude_min
274
        || std::isnan(longitude_min)
275
        || std::isnan(longitude_max)
276
        || std::isnan(latitude_min)
277
        || std::isnan(latitude_max))
278
    {
279
        return {};
280
    }
281

282
    auto saturate = [](Float64 & value, Float64 min, Float64 max)
283
    {
284
        if (value < min)
285
            value = min;
286
        else if (value > max)
287
            value = max;
288
    };
289

290
    saturate(longitude_min, LON_MIN, LON_MAX);
291
    saturate(longitude_max, LON_MIN, LON_MAX);
292
    saturate(latitude_min, LAT_MIN, LAT_MAX);
293
    saturate(latitude_max, LAT_MIN, LAT_MAX);
294

295
    Float64 lon_step = getSpan(precision, LONGITUDE);
296
    Float64 lat_step = getSpan(precision, LATITUDE);
297

298
    /// Align max to the right (or up) border of geohash grid cell to ensure that cell is in result.
299
    Float64 lon_min = floor(longitude_min / lon_step) * lon_step;
300
    Float64 lat_min = floor(latitude_min / lat_step) * lat_step;
301
    Float64 lon_max = ceil(longitude_max / lon_step) * lon_step;
302
    Float64 lat_max = ceil(latitude_max / lat_step) * lat_step;
303

304
    UInt32 lon_items = static_cast<UInt32>((lon_max - lon_min) / lon_step);
305
    UInt32 lat_items = static_cast<UInt32>((lat_max - lat_min) / lat_step);
306

307
    return GeohashesInBoxPreparedArgs
308
    {
309
        std::max<UInt64>(1, static_cast<UInt64>(lon_items) * lat_items),
310
        lon_items,
311
        lat_items,
312
        lon_min,
313
        lat_min,
314
        lon_step,
315
        lat_step,
316
        precision
317
    };
318
}
319

320
UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out)
321
{
322
    if (args.precision == 0
323
        || args.precision > MAX_PRECISION
324
        || args.longitude_step <= 0
325
        || args.latitude_step <= 0)
326
    {
327
        return 0;
328
    }
329

330
    UInt64 items = 0;
331
    for (size_t i = 0; i < args.longitude_items; ++i)
332
    {
333
        for (size_t j = 0; j < args.latitude_items; ++j)
334
        {
335
            size_t length = geohashEncodeImpl(
336
                args.longitude_min + args.longitude_step * i,
337
                args.latitude_min + args.latitude_step * j,
338
                args.precision,
339
                out);
340

341
            out += length;
342
            *out = '\0';
343
            ++out;
344

345
            ++items;
346
        }
347
    }
348

349
    if (items == 0)
350
    {
351
        size_t length = geohashEncodeImpl(args.longitude_min, args.latitude_min, args.precision, out);
352
        out += length;
353
        *out = '\0';
354
        ++out;
355

356
        ++items;
357
    }
358

359
    return items;
360
}
361

362
}
363

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.