ClickHouse
440 строк · 18.0 Кб
1#include <Columns/ColumnFixedString.h>2#include <Columns/ColumnString.h>3#include <Columns/ColumnsNumber.h>4#include <DataTypes/DataTypeFixedString.h>5#include <Functions/FunctionFactory.h>6#include <Functions/FunctionHelpers.h>7#include <Functions/IFunction.h>8#include <base/IPv4andIPv6.h>9
10#include "config.h"11
12#if USE_BLAKE313# include <llvm/Support/BLAKE3.h>14#endif15
16#if USE_SSL17# include <openssl/md4.h>18# include <openssl/md5.h>19# include <openssl/sha.h>20# if USE_BORINGSSL21# include <openssl/digest.h>22# else23# include <openssl/evp.h>24# endif25#endif26
27/// Instatiating only the functions that require FunctionStringHashFixedString in a separate file
28/// to better parallelize the build procedure and avoid MSan build failure
29/// due to excessive resource consumption.
30
31namespace DB32{
33namespace ErrorCodes34{
35extern const int ILLEGAL_COLUMN;36extern const int ILLEGAL_TYPE_OF_ARGUMENT;37}
38
39
40#if USE_SSL41
42struct MD4Impl43{
44static constexpr auto name = "MD4";45enum46{47length = MD4_DIGEST_LENGTH48};49
50static void apply(const char * begin, const size_t size, unsigned char * out_char_data)51{52MD4_CTX ctx;53MD4_Init(&ctx);54MD4_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);55MD4_Final(out_char_data, &ctx);56}57};58
59struct MD5Impl60{
61static constexpr auto name = "MD5";62enum63{64length = MD5_DIGEST_LENGTH65};66
67static void apply(const char * begin, const size_t size, unsigned char * out_char_data)68{69MD5_CTX ctx;70MD5_Init(&ctx);71MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);72MD5_Final(out_char_data, &ctx);73}74};75
76struct SHA1Impl77{
78static constexpr auto name = "SHA1";79enum80{81length = SHA_DIGEST_LENGTH82};83
84static void apply(const char * begin, const size_t size, unsigned char * out_char_data)85{86SHA_CTX ctx;87SHA1_Init(&ctx);88SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);89SHA1_Final(out_char_data, &ctx);90}91};92
93struct SHA224Impl94{
95static constexpr auto name = "SHA224";96enum97{98length = SHA224_DIGEST_LENGTH99};100
101static void apply(const char * begin, const size_t size, unsigned char * out_char_data)102{103SHA256_CTX ctx;104SHA224_Init(&ctx);105SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);106SHA224_Final(out_char_data, &ctx);107}108};109
110struct SHA256Impl111{
112static constexpr auto name = "SHA256";113enum114{115length = SHA256_DIGEST_LENGTH116};117
118static void apply(const char * begin, const size_t size, unsigned char * out_char_data)119{120SHA256_CTX ctx;121SHA256_Init(&ctx);122SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);123SHA256_Final(out_char_data, &ctx);124}125};126
127struct SHA384Impl128{
129static constexpr auto name = "SHA384";130enum131{132length = SHA384_DIGEST_LENGTH133};134
135static void apply(const char * begin, const size_t size, unsigned char * out_char_data)136{137SHA512_CTX ctx;138SHA384_Init(&ctx);139SHA384_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);140SHA384_Final(out_char_data, &ctx);141}142};143
144struct SHA512Impl145{
146static constexpr auto name = "SHA512";147enum148{149length = 64150};151
152static void apply(const char * begin, const size_t size, unsigned char * out_char_data)153{154SHA512_CTX ctx;155SHA512_Init(&ctx);156SHA512_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);157SHA512_Final(out_char_data, &ctx);158}159};160
161struct SHA512Impl256162{
163static constexpr auto name = "SHA512_256";164enum165{166length = 32167};168
169static void apply(const char * begin, const size_t size, unsigned char * out_char_data)170{171/// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default172/// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,173/// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available174/// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.175auto * md_ctx = EVP_MD_CTX_create();176EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);177EVP_DigestUpdate(md_ctx, begin, size);178EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);179EVP_MD_CTX_destroy(md_ctx);180}181};182#endif183
184#if USE_BLAKE3185struct ImplBLAKE3186{
187static constexpr auto name = "BLAKE3";188enum189{190length = 32191};192
193static void apply(const char * begin, const size_t size, unsigned char * out_char_data)194{195static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length);196auto & result = *reinterpret_cast<std::array<uint8_t, LLVM_BLAKE3_OUT_LEN> *>(out_char_data);197
198llvm::BLAKE3 hasher;199if (size > 0)200hasher.update(llvm::StringRef(begin, size));201hasher.final(result);202}203};204
205#endif206
207template <typename Impl>208class FunctionStringHashFixedString : public IFunction209{
210public:211static constexpr auto name = Impl::name;212static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringHashFixedString>(); }213
214String getName() const override { return name; }215
216size_t getNumberOfArguments() const override { return 1; }217
218DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override219{220if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))221throw Exception(222ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());223
224return std::make_shared<DataTypeFixedString>(Impl::length);225}226
227bool useDefaultImplementationForConstants() const override { return true; }228
229bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }230
231ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override232{233if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))234{235auto col_to = ColumnFixedString::create(Impl::length);236
237const typename ColumnString::Chars & data = col_from->getChars();238const typename ColumnString::Offsets & offsets = col_from->getOffsets();239auto & chars_to = col_to->getChars();240const auto size = offsets.size();241chars_to.resize(size * Impl::length);242
243ColumnString::Offset current_offset = 0;244for (size_t i = 0; i < size; ++i)245{246Impl::apply(247reinterpret_cast<const char *>(&data[current_offset]),248offsets[i] - current_offset - 1,249reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));250
251current_offset = offsets[i];252}253
254return col_to;255}256else if (const ColumnFixedString * col_from_fix = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))257{258auto col_to = ColumnFixedString::create(Impl::length);259const typename ColumnFixedString::Chars & data = col_from_fix->getChars();260const auto size = col_from_fix->size();261auto & chars_to = col_to->getChars();262const auto length = col_from_fix->getN();263chars_to.resize(size * Impl::length);264for (size_t i = 0; i < size; ++i)265{266Impl::apply(267reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));268}269return col_to;270}271else if (const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))272{273auto col_to = ColumnFixedString::create(Impl::length);274const typename ColumnIPv6::Container & data = col_from_ip->getData();275const auto size = col_from_ip->size();276auto & chars_to = col_to->getChars();277const auto length = sizeof(IPv6::UnderlyingType);278chars_to.resize(size * Impl::length);279for (size_t i = 0; i < size; ++i)280{281Impl::apply(282reinterpret_cast<const char *>(&data[i]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));283}284return col_to;285}286else287throw Exception(288ErrorCodes::ILLEGAL_COLUMN,289"Illegal column {} of first argument of function {}",290arguments[0].column->getName(),291getName());292}293};294
295#if USE_SSL || USE_BLAKE3296REGISTER_FUNCTION(HashFixedStrings)297{
298# if USE_SSL299using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>;300using FunctionMD5 = FunctionStringHashFixedString<MD5Impl>;301using FunctionSHA1 = FunctionStringHashFixedString<SHA1Impl>;302using FunctionSHA224 = FunctionStringHashFixedString<SHA224Impl>;303using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;304using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;305using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;306using FunctionSHA512_256 = FunctionStringHashFixedString<SHA512Impl256>;307
308factory.registerFunction<FunctionMD4>(FunctionDocumentation{309.description = R"(Calculates the MD4 hash of the given string.)",310.syntax = "SELECT MD4(s);",311.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},312.returned_value313= "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",314.examples315= {{"",316"SELECT HEX(MD4('abc'));",317R"(318┌─hex(MD4('abc'))──────────────────┐
319│ A448017AAF21D8525FC10AE87AA6729D │
320└──────────────────────────────────┘
321)"}}});322factory.registerFunction<FunctionMD5>(FunctionDocumentation{323.description = R"(Calculates the MD5 hash of the given string.)",324.syntax = "SELECT MD5(s);",325.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},326.returned_value327= "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).",328.examples329= {{"",330"SELECT HEX(MD5('abc'));",331R"(332┌─hex(MD5('abc'))──────────────────┐
333│ 900150983CD24FB0D6963F7D28E17F72 │
334└──────────────────────────────────┘
335)"}}});336factory.registerFunction<FunctionSHA1>(FunctionDocumentation{337.description = R"(Calculates the SHA1 hash of the given string.)",338.syntax = "SELECT SHA1(s);",339.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},340.returned_value341= "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",342.examples343= {{"",344"SELECT HEX(SHA1('abc'));",345R"(346┌─hex(SHA1('abc'))─────────────────────────┐
347│ A9993E364706816ABA3E25717850C26C9CD0D89D │
348└──────────────────────────────────────────┘
349)"}}});350factory.registerFunction<FunctionSHA224>(FunctionDocumentation{351.description = R"(Calculates the SHA224 hash of the given string.)",352.syntax = "SELECT SHA224(s);",353.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},354.returned_value355= "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",356.examples357= {{"",358"SELECT HEX(SHA224('abc'));",359R"(360┌─hex(SHA224('abc'))───────────────────────────────────────┐
361│ 23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7 │
362└──────────────────────────────────────────────────────────┘
363)"}}});364factory.registerFunction<FunctionSHA256>(FunctionDocumentation{365.description = R"(Calculates the SHA256 hash of the given string.)",366.syntax = "SELECT SHA256(s);",367.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},368.returned_value369= "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",370.examples371= {{"",372"SELECT HEX(SHA256('abc'));",373R"(374┌─hex(SHA256('abc'))───────────────────────────────────────────────┐
375│ BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD │
376└──────────────────────────────────────────────────────────────────┘
377)"}}});378factory.registerFunction<FunctionSHA384>(FunctionDocumentation{379.description = R"(Calculates the SHA384 hash of the given string.)",380.syntax = "SELECT SHA384(s);",381.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},382.returned_value383= "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",384.examples385= {{"",386"SELECT HEX(SHA384('abc'));",387R"(388┌─hex(SHA384('abc'))───────────────────────────────────────────────────────────────────────────────┐
389│ CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7 │
390└──────────────────────────────────────────────────────────────────────────────────────────────────┘
391)"}}});392factory.registerFunction<FunctionSHA512>(FunctionDocumentation{393.description = R"(Calculates the SHA512 hash of the given string.)",394.syntax = "SELECT SHA512(s);",395.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},396.returned_value397= "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",398.examples399= {{"",400"SELECT HEX(SHA512('abc'));",401R"(402┌─hex(SHA512('abc'))───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
403│ DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F │
404└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
405)"}}});406factory.registerFunction<FunctionSHA512_256>(FunctionDocumentation{407.description = R"(Calculates the SHA512_256 hash of the given string.)",408.syntax = "SELECT SHA512_256(s);",409.arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}},410.returned_value411= "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).",412.examples413= {{"",414"SELECT HEX(SHA512_256('abc'));",415R"(416┌─hex(SHA512_256('abc'))───────────────────────────────────────────┐
417│ 53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23 │
418└──────────────────────────────────────────────────────────────────┘
419)"}}});420
421
422# endif423
424# if USE_BLAKE3425using FunctionBLAKE3 = FunctionStringHashFixedString<ImplBLAKE3>;426factory.registerFunction<FunctionBLAKE3>(427FunctionDocumentation{428.description = R"(429Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString.
430This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library.
431The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256.
432It returns a BLAKE3 hash as a byte array with type FixedString(32).
433)",434.examples{{"hash", "SELECT hex(BLAKE3('ABC'))", ""}},435.categories{"Hash"}},436FunctionFactory::CaseSensitive);437# endif438}
439#endif440}
441