efl

Форк
0
311 строк · 10.8 Кб
1
// Copyright (c) 2011 Google, Inc.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a copy
4
// of this software and associated documentation files (the "Software"), to deal
5
// in the Software without restriction, including without limitation the rights
6
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
// copies of the Software, and to permit persons to whom the Software is
8
// furnished to do so, subject to the following conditions:
9
//
10
// The above copyright notice and this permission notice shall be included in
11
// all copies or substantial portions of the Software.
12
//
13
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
// THE SOFTWARE.
20
//
21
// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
22
//
23
// This file provides CityHash64() and related functions.
24
//
25
// It's probably possible to create even faster hash functions by
26
// writing a program that systematically explores some of the space of
27
// possible hash functions, by using SIMD instructions, or by
28
// compromising on hash quality.
29

30
#pragma GCC visibility push(default)
31

32
#include "city.h"
33

34
#include <algorithm>
35

36
using namespace std;
37

38
#define UNALIGNED_LOAD64(p) (*(const uint64*)(p))
39
#define UNALIGNED_LOAD32(p) (*(const uint32*)(p))
40

41
#if !defined(LIKELY)
42
#if defined(__GNUC__)
43
#define LIKELY(x) (__builtin_expect(!!(x), 1))
44
#else
45
#define LIKELY(x) (x)
46
#endif
47
#endif
48

49
// Some primes between 2^63 and 2^64 for various uses.
50
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
51
static const uint64 k1 = 0xb492b66fbe98f273ULL;
52
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
53
static const uint64 k3 = 0xc949d7c7509e6557ULL;
54

55
// Bitwise right rotate.  Normally this will compile to a single
56
// instruction, especially if the shift is a manifest constant.
57
static uint64 Rotate(uint64 val, int shift) {
58
  // Avoid shifting by 64: doing so yields an undefined result.
59
  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
60
}
61

62
// Equivalent to Rotate(), but requires the second arg to be non-zero.
63
// On x86-64, and probably others, it's possible for this to compile
64
// to a single instruction if both args are already in registers.
65
static uint64 RotateByAtLeast1(uint64 val, int shift) {
66
  return (val >> shift) | (val << (64 - shift));
67
}
68

69
static uint64 ShiftMix(uint64 val) {
70
  return val ^ (val >> 47);
71
}
72

73
static uint64 HashLen16(uint64 u, uint64 v) {
74
  return Hash128to64(uint128(u, v));
75
}
76

77
static uint64 HashLen0to16(const char *s, size_t len) {
78
  if (len > 8) {
79
    uint64 a = UNALIGNED_LOAD64(s);
80
    uint64 b = UNALIGNED_LOAD64(s + len - 8);
81
    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
82
  }
83
  if (len >= 4) {
84
    uint64 a = UNALIGNED_LOAD32(s);
85
    return HashLen16(len + (a << 3), UNALIGNED_LOAD32(s + len - 4));
86
  }
87
  if (len > 0) {
88
    uint8 a = s[0];
89
    uint8 b = s[len >> 1];
90
    uint8 c = s[len - 1];
91
    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
92
    uint32 z = len + (static_cast<uint32>(c) << 2);
93
    return ShiftMix(y * k2 ^ z * k3) * k2;
94
  }
95
  return k2;
96
}
97

98
// This probably works well for 16-byte strings as well, but it may be overkill
99
// in that case.
100
static uint64 HashLen17to32(const char *s, size_t len) {
101
  uint64 a = UNALIGNED_LOAD64(s) * k1;
102
  uint64 b = UNALIGNED_LOAD64(s + 8);
103
  uint64 c = UNALIGNED_LOAD64(s + len - 8) * k2;
104
  uint64 d = UNALIGNED_LOAD64(s + len - 16) * k0;
105
  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
106
                   a + Rotate(b ^ k3, 20) - c + len);
107
}
108

109
// Return a 16-byte hash for 48 bytes.  Quick and dirty.
110
// Callers do best to use "random-looking" values for a and b.
111
static pair<uint64, uint64> WeakHashLen32WithSeeds(
112
    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
113
  a += w;
114
  b = Rotate(b + a + z, 21);
115
  uint64 c = a;
116
  a += x;
117
  a += y;
118
  b += Rotate(a, 44);
119
  return make_pair(a + z, b + c);
120
}
121

122
// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
123
static pair<uint64, uint64> WeakHashLen32WithSeeds(
124
    const char* s, uint64 a, uint64 b) {
125
  return WeakHashLen32WithSeeds(UNALIGNED_LOAD64(s),
126
                                UNALIGNED_LOAD64(s + 8),
127
                                UNALIGNED_LOAD64(s + 16),
128
                                UNALIGNED_LOAD64(s + 24),
129
                                a,
130
                                b);
131
}
132

133
// Return an 8-byte hash for 33 to 64 bytes.
134
static uint64 HashLen33to64(const char *s, size_t len) {
135
  uint64 z = UNALIGNED_LOAD64(s + 24);
136
  uint64 a = UNALIGNED_LOAD64(s) + (len + UNALIGNED_LOAD64(s + len - 16)) * k0;
137
  uint64 b = Rotate(a + z, 52);
138
  uint64 c = Rotate(a, 37);
139
  a += UNALIGNED_LOAD64(s + 8);
140
  c += Rotate(a, 7);
141
  a += UNALIGNED_LOAD64(s + 16);
142
  uint64 vf = a + z;
143
  uint64 vs = b + Rotate(a, 31) + c;
144
  a = UNALIGNED_LOAD64(s + 16) + UNALIGNED_LOAD64(s + len - 32);
145
  z = UNALIGNED_LOAD64(s + len - 8);
146
  b = Rotate(a + z, 52);
147
  c = Rotate(a, 37);
148
  a += UNALIGNED_LOAD64(s + len - 24);
149
  c += Rotate(a, 7);
150
  a += UNALIGNED_LOAD64(s + len - 16);
151
  uint64 wf = a + z;
152
  uint64 ws = b + Rotate(a, 31) + c;
153
  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
154
  return ShiftMix(r * k0 + vs) * k2;
155
}
156

157
uint64 CityHash64(const char *s, size_t len) {
158
  if (len <= 32) {
159
    if (len <= 16) {
160
      return HashLen0to16(s, len);
161
    } else {
162
      return HashLen17to32(s, len);
163
    }
164
  } else if (len <= 64) {
165
    return HashLen33to64(s, len);
166
  }
167

168
  // For strings over 64 bytes we hash the end first, and then as we
169
  // loop we keep 56 bytes of state: v, w, x, y, and z.
170
  uint64 x = UNALIGNED_LOAD64(s);
171
  uint64 y = UNALIGNED_LOAD64(s + len - 16) ^ k1;
172
  uint64 z = UNALIGNED_LOAD64(s + len - 56) ^ k0;
173
  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, y);
174
  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0);
175
  z += ShiftMix(v.second) * k1;
176
  x = Rotate(z + x, 39) * k1;
177
  y = Rotate(y, 33) * k1;
178

179
  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
180
  len = (len - 1) & ~static_cast<size_t>(63);
181
  do {
182
    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
183
    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
184
    x ^= w.second;
185
    y ^= v.first;
186
    z = Rotate(z ^ w.first, 33);
187
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
188
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
189
    std::swap(z, x);
190
    s += 64;
191
    len -= 64;
192
  } while (len != 0);
193
  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
194
                   HashLen16(v.second, w.second) + x);
195
}
196

197
uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
198
  return CityHash64WithSeeds(s, len, k2, seed);
199
}
200

201
uint64 CityHash64WithSeeds(const char *s, size_t len,
202
                           uint64 seed0, uint64 seed1) {
203
  return HashLen16(CityHash64(s, len) - seed0, seed1);
204
}
205

206
// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
207
// of any length representable in ssize_t.  Based on City and Murmur.
208
static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
209
  uint64 a = Uint128Low64(seed);
210
  uint64 b = Uint128High64(seed);
211
  uint64 c = 0;
212
  uint64 d = 0;
213
  ssize_t l = len - 16;
214
  if (l <= 0) {  // len <= 16
215
    c = b * k1 + HashLen0to16(s, len);
216
    d = Rotate(a + (len >= 8 ? UNALIGNED_LOAD64(s) : c), 32);
217
  } else {  // len > 16
218
    c = HashLen16(UNALIGNED_LOAD64(s + len - 8) + k1, a);
219
    d = HashLen16(b + len, c + UNALIGNED_LOAD64(s + len - 16));
220
    a += d;
221
    do {
222
      a ^= ShiftMix(UNALIGNED_LOAD64(s) * k1) * k1;
223
      a *= k1;
224
      b ^= a;
225
      c ^= ShiftMix(UNALIGNED_LOAD64(s + 8) * k1) * k1;
226
      c *= k1;
227
      d ^= c;
228
      s += 16;
229
      l -= 16;
230
    } while (l > 0);
231
  }
232
  a = HashLen16(a, c);
233
  b = HashLen16(d, b);
234
  return uint128(a ^ b, HashLen16(b, a));
235
}
236

237
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
238
  if (len < 128) {
239
    return CityMurmur(s, len, seed);
240
  }
241

242
  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
243
  // v, w, x, y, and z.
244
  pair<uint64, uint64> v, w;
245
  uint64 x = Uint128Low64(seed);
246
  uint64 y = Uint128High64(seed);
247
  uint64 z = len * k1;
248
  v.first = Rotate(y ^ k1, 49) * k1 + UNALIGNED_LOAD64(s);
249
  v.second = Rotate(v.first, 42) * k1 + UNALIGNED_LOAD64(s + 8);
250
  w.first = Rotate(y + z, 35) * k1 + x;
251
  w.second = Rotate(x + UNALIGNED_LOAD64(s + 88), 53) * k1;
252

253
  // This is the same inner loop as CityHash64(), manually unrolled.
254
  do {
255
    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
256
    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
257
    x ^= w.second;
258
    y ^= v.first;
259
    z = Rotate(z ^ w.first, 33);
260
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
261
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
262
    std::swap(z, x);
263
    s += 64;
264
    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
265
    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
266
    x ^= w.second;
267
    y ^= v.first;
268
    z = Rotate(z ^ w.first, 33);
269
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
270
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
271
    std::swap(z, x);
272
    s += 64;
273
    len -= 128;
274
  } while (LIKELY(len >= 128));
275
  y += Rotate(w.first, 37) * k0 + z;
276
  x += Rotate(v.first + z, 49) * k0;
277
  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
278
  for (size_t tail_done = 0; tail_done < len; ) {
279
    tail_done += 32;
280
    y = Rotate(y - x, 42) * k0 + v.second;
281
    w.first += UNALIGNED_LOAD64(s + len - tail_done + 16);
282
    x = Rotate(x, 49) * k0 + w.first;
283
    w.first += v.first;
284
    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second);
285
  }
286
  // At this point our 48 bytes of state should contain more than
287
  // enough information for a strong 128-bit hash.  We use two
288
  // different 48-byte-to-8-byte hashes to get a 16-byte final result.
289
  x = HashLen16(x, v.first);
290
  y = HashLen16(y, w.first);
291
  return uint128(HashLen16(x + v.second, w.second) + y,
292
                 HashLen16(x + w.second, y + v.second));
293
}
294

295
uint128 CityHash128(const char *s, size_t len) {
296
  if (len >= 16) {
297
    return CityHash128WithSeed(s + 16,
298
                               len - 16,
299
                               uint128(UNALIGNED_LOAD64(s) ^ k3,
300
                                       UNALIGNED_LOAD64(s + 8)));
301
  } else if (len >= 8) {
302
    return CityHash128WithSeed(NULL,
303
                               0,
304
                               uint128(UNALIGNED_LOAD64(s) ^ (len * k0),
305
                                       UNALIGNED_LOAD64(s + len - 8) ^ k1));
306
  } else {
307
    return CityHash128WithSeed(s, len, uint128(k0, k1));
308
  }
309
}
310

311
#pragma GCC visibility pop
312

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.