ClickHouse

Форк
0
/
FunctionsStringHashFixedString.cpp 
440 строк · 18.0 Кб
1
#include <Columns/ColumnFixedString.h>
2
#include <Columns/ColumnString.h>
3
#include <Columns/ColumnsNumber.h>
4
#include <DataTypes/DataTypeFixedString.h>
5
#include <Functions/FunctionFactory.h>
6
#include <Functions/FunctionHelpers.h>
7
#include <Functions/IFunction.h>
8
#include <base/IPv4andIPv6.h>
9

10
#include "config.h"
11

12
#if USE_BLAKE3
13
#    include <llvm/Support/BLAKE3.h>
14
#endif
15

16
#if USE_SSL
17
#    include <openssl/md4.h>
18
#    include <openssl/md5.h>
19
#    include <openssl/sha.h>
20
#    if USE_BORINGSSL
21
#        include <openssl/digest.h>
22
#    else
23
#        include <openssl/evp.h>
24
#    endif
25
#endif
26

27
/// Instatiating only the functions that require FunctionStringHashFixedString in a separate file
28
/// to better parallelize the build procedure and avoid MSan build failure
29
/// due to excessive resource consumption.
30

31
namespace DB
32
{
33
namespace ErrorCodes
34
{
35
extern const int ILLEGAL_COLUMN;
36
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
37
}
38

39

40
#if USE_SSL
41

42
struct MD4Impl
43
{
44
    static constexpr auto name = "MD4";
45
    enum
46
    {
47
        length = MD4_DIGEST_LENGTH
48
    };
49

50
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
51
    {
52
        MD4_CTX ctx;
53
        MD4_Init(&ctx);
54
        MD4_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
55
        MD4_Final(out_char_data, &ctx);
56
    }
57
};
58

59
struct MD5Impl
60
{
61
    static constexpr auto name = "MD5";
62
    enum
63
    {
64
        length = MD5_DIGEST_LENGTH
65
    };
66

67
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
68
    {
69
        MD5_CTX ctx;
70
        MD5_Init(&ctx);
71
        MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
72
        MD5_Final(out_char_data, &ctx);
73
    }
74
};
75

76
struct SHA1Impl
77
{
78
    static constexpr auto name = "SHA1";
79
    enum
80
    {
81
        length = SHA_DIGEST_LENGTH
82
    };
83

84
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
85
    {
86
        SHA_CTX ctx;
87
        SHA1_Init(&ctx);
88
        SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
89
        SHA1_Final(out_char_data, &ctx);
90
    }
91
};
92

93
struct SHA224Impl
94
{
95
    static constexpr auto name = "SHA224";
96
    enum
97
    {
98
        length = SHA224_DIGEST_LENGTH
99
    };
100

101
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
102
    {
103
        SHA256_CTX ctx;
104
        SHA224_Init(&ctx);
105
        SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
106
        SHA224_Final(out_char_data, &ctx);
107
    }
108
};
109

110
struct SHA256Impl
111
{
112
    static constexpr auto name = "SHA256";
113
    enum
114
    {
115
        length = SHA256_DIGEST_LENGTH
116
    };
117

118
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
119
    {
120
        SHA256_CTX ctx;
121
        SHA256_Init(&ctx);
122
        SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
123
        SHA256_Final(out_char_data, &ctx);
124
    }
125
};
126

127
struct SHA384Impl
128
{
129
    static constexpr auto name = "SHA384";
130
    enum
131
    {
132
        length = SHA384_DIGEST_LENGTH
133
    };
134

135
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
136
    {
137
        SHA512_CTX ctx;
138
        SHA384_Init(&ctx);
139
        SHA384_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
140
        SHA384_Final(out_char_data, &ctx);
141
    }
142
};
143

144
struct SHA512Impl
145
{
146
    static constexpr auto name = "SHA512";
147
    enum
148
    {
149
        length = 64
150
    };
151

152
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
153
    {
154
        SHA512_CTX ctx;
155
        SHA512_Init(&ctx);
156
        SHA512_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
157
        SHA512_Final(out_char_data, &ctx);
158
    }
159
};
160

161
struct SHA512Impl256
162
{
163
    static constexpr auto name = "SHA512_256";
164
    enum
165
    {
166
        length = 32
167
    };
168

169
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
170
    {
171
        /// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default
172
        /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,
173
        /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available
174
        /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.
175
        auto * md_ctx = EVP_MD_CTX_create();
176
        EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);
177
        EVP_DigestUpdate(md_ctx, begin, size);
178
        EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);
179
        EVP_MD_CTX_destroy(md_ctx);
180
    }
181
};
182
#endif
183

184
#if USE_BLAKE3
185
struct ImplBLAKE3
186
{
187
    static constexpr auto name = "BLAKE3";
188
    enum
189
    {
190
        length = 32
191
    };
192

193
    static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
194
    {
195
        static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length);
196
        auto & result = *reinterpret_cast<std::array<uint8_t, LLVM_BLAKE3_OUT_LEN> *>(out_char_data);
197

198
        llvm::BLAKE3 hasher;
199
        if (size > 0)
200
            hasher.update(llvm::StringRef(begin, size));
201
        hasher.final(result);
202
    }
203
};
204

205
#endif
206

207
template <typename Impl>
208
class FunctionStringHashFixedString : public IFunction
209
{
210
public:
211
    static constexpr auto name = Impl::name;
212
    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringHashFixedString>(); }
213

214
    String getName() const override { return name; }
215

216
    size_t getNumberOfArguments() const override { return 1; }
217

218
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
219
    {
220
        if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))
221
            throw Exception(
222
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
223

224
        return std::make_shared<DataTypeFixedString>(Impl::length);
225
    }
226

227
    bool useDefaultImplementationForConstants() const override { return true; }
228

229
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
230

231
    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
232
    {
233
        if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
234
        {
235
            auto col_to = ColumnFixedString::create(Impl::length);
236

237
            const typename ColumnString::Chars & data = col_from->getChars();
238
            const typename ColumnString::Offsets & offsets = col_from->getOffsets();
239
            auto & chars_to = col_to->getChars();
240
            const auto size = offsets.size();
241
            chars_to.resize(size * Impl::length);
242

243
            ColumnString::Offset current_offset = 0;
244
            for (size_t i = 0; i < size; ++i)
245
            {
246
                Impl::apply(
247
                    reinterpret_cast<const char *>(&data[current_offset]),
248
                    offsets[i] - current_offset - 1,
249
                    reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
250

251
                current_offset = offsets[i];
252
            }
253

254
            return col_to;
255
        }
256
        else if (const ColumnFixedString * col_from_fix = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
257
        {
258
            auto col_to = ColumnFixedString::create(Impl::length);
259
            const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
260
            const auto size = col_from_fix->size();
261
            auto & chars_to = col_to->getChars();
262
            const auto length = col_from_fix->getN();
263
            chars_to.resize(size * Impl::length);
264
            for (size_t i = 0; i < size; ++i)
265
            {
266
                Impl::apply(
267
                    reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
268
            }
269
            return col_to;
270
        }
271
        else if (const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))
272
        {
273
            auto col_to = ColumnFixedString::create(Impl::length);
274
            const typename ColumnIPv6::Container & data = col_from_ip->getData();
275
            const auto size = col_from_ip->size();
276
            auto & chars_to = col_to->getChars();
277
            const auto length = sizeof(IPv6::UnderlyingType);
278
            chars_to.resize(size * Impl::length);
279
            for (size_t i = 0; i < size; ++i)
280
            {
281
                Impl::apply(
282
                    reinterpret_cast<const char *>(&data[i]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
283
            }
284
            return col_to;
285
        }
286
        else
287
            throw Exception(
288
                ErrorCodes::ILLEGAL_COLUMN,
289
                "Illegal column {} of first argument of function {}",
290
                arguments[0].column->getName(),
291
                getName());
292
    }
293
};
294

295
#if USE_SSL || USE_BLAKE3
296
REGISTER_FUNCTION(HashFixedStrings)
297
{
298
#    if USE_SSL
299
    using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>;
300
    using FunctionMD5 = FunctionStringHashFixedString<MD5Impl>;
301
    using FunctionSHA1 = FunctionStringHashFixedString<SHA1Impl>;
302
    using FunctionSHA224 = FunctionStringHashFixedString<SHA224Impl>;
303
    using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;
304
    using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
305
    using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
306
    using FunctionSHA512_256 = FunctionStringHashFixedString<SHA512Impl256>;
307

308
    factory.registerFunction<FunctionMD4>(FunctionDocumentation{
309
        .description = R"(Calculates the MD4 hash of the given string.)",
310
        .syntax = "SELECT MD4(s);",
311
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
312
        .returned_value
313
        = "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
314
        .examples
315
        = {{"",
316
            "SELECT HEX(MD4('abc'));",
317
            R"(
318
┌─hex(MD4('abc'))──────────────────┐
319
│ A448017AAF21D8525FC10AE87AA6729D │
320
└──────────────────────────────────┘
321
            )"}}});
322
    factory.registerFunction<FunctionMD5>(FunctionDocumentation{
323
        .description = R"(Calculates the MD5 hash of the given string.)",
324
        .syntax = "SELECT MD5(s);",
325
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
326
        .returned_value
327
        = "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",
328
        .examples
329
        = {{"",
330
            "SELECT HEX(MD5('abc'));",
331
            R"(
332
┌─hex(MD5('abc'))──────────────────┐
333
│ 900150983CD24FB0D6963F7D28E17F72 │
334
└──────────────────────────────────┘
335
            )"}}});
336
    factory.registerFunction<FunctionSHA1>(FunctionDocumentation{
337
        .description = R"(Calculates the SHA1 hash of the given string.)",
338
        .syntax = "SELECT SHA1(s);",
339
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
340
        .returned_value
341
        = "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
342
        .examples
343
        = {{"",
344
            "SELECT HEX(SHA1('abc'));",
345
            R"(
346
┌─hex(SHA1('abc'))─────────────────────────┐
347
│ A9993E364706816ABA3E25717850C26C9CD0D89D │
348
└──────────────────────────────────────────┘
349
            )"}}});
350
    factory.registerFunction<FunctionSHA224>(FunctionDocumentation{
351
        .description = R"(Calculates the SHA224 hash of the given string.)",
352
        .syntax = "SELECT SHA224(s);",
353
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
354
        .returned_value
355
        = "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
356
        .examples
357
        = {{"",
358
            "SELECT HEX(SHA224('abc'));",
359
            R"(
360
┌─hex(SHA224('abc'))───────────────────────────────────────┐
361
│ 23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7 │
362
└──────────────────────────────────────────────────────────┘
363
            )"}}});
364
    factory.registerFunction<FunctionSHA256>(FunctionDocumentation{
365
        .description = R"(Calculates the SHA256 hash of the given string.)",
366
        .syntax = "SELECT SHA256(s);",
367
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
368
        .returned_value
369
        = "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
370
        .examples
371
        = {{"",
372
            "SELECT HEX(SHA256('abc'));",
373
            R"(
374
┌─hex(SHA256('abc'))───────────────────────────────────────────────┐
375
│ BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD │
376
└──────────────────────────────────────────────────────────────────┘
377
            )"}}});
378
    factory.registerFunction<FunctionSHA384>(FunctionDocumentation{
379
        .description = R"(Calculates the SHA384 hash of the given string.)",
380
        .syntax = "SELECT SHA384(s);",
381
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
382
        .returned_value
383
        = "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
384
        .examples
385
        = {{"",
386
            "SELECT HEX(SHA384('abc'));",
387
            R"(
388
┌─hex(SHA384('abc'))───────────────────────────────────────────────────────────────────────────────┐
389
│ CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7 │
390
└──────────────────────────────────────────────────────────────────────────────────────────────────┘
391
            )"}}});
392
    factory.registerFunction<FunctionSHA512>(FunctionDocumentation{
393
        .description = R"(Calculates the SHA512 hash of the given string.)",
394
        .syntax = "SELECT SHA512(s);",
395
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
396
        .returned_value
397
        = "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
398
        .examples
399
        = {{"",
400
            "SELECT HEX(SHA512('abc'));",
401
            R"(
402
┌─hex(SHA512('abc'))───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
403
│ DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F │
404
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
405
            )"}}});
406
    factory.registerFunction<FunctionSHA512_256>(FunctionDocumentation{
407
        .description = R"(Calculates the SHA512_256 hash of the given string.)",
408
        .syntax = "SELECT SHA512_256(s);",
409
        .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},
410
        .returned_value
411
        = "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",
412
        .examples
413
        = {{"",
414
            "SELECT HEX(SHA512_256('abc'));",
415
            R"(
416
┌─hex(SHA512_256('abc'))───────────────────────────────────────────┐
417
│ 53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23 │
418
└──────────────────────────────────────────────────────────────────┘
419
            )"}}});
420

421

422
#    endif
423

424
#    if USE_BLAKE3
425
    using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;
426
    factory.registerFunction<FunctionBLAKE3>(
427
        FunctionDocumentation{
428
            .description = R"(
429
    Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
430
    This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
431
    The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
432
    It returns a BLAKE3 hash as a byte array with type FixedString(32).
433
    )",
434
            .examples{{"hash", "SELECT hex(BLAKE3('ABC'))", ""}},
435
            .categories{"Hash"}},
436
        FunctionFactory::CaseSensitive);
437
#    endif
438
}
439
#endif
440
}
441

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.