ClickHouse
103 строки · 2.4 Кб
1#include <Functions/FunctionTokens.h>2#include <Functions/FunctionFactory.h>3#include <Common/StringUtils/StringUtils.h>4
5
6namespace DB7{
8
9/** Functions that split strings into an array of strings or vice versa.
10*
11* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
12*/
13namespace
14{
15
16using Pos = const char *;17
18class SplitByWhitespaceImpl19{
20private:21Pos pos;22Pos end;23std::optional<size_t> max_splits;24size_t splits;25bool max_substrings_includes_remaining_string;26
27public:28static constexpr auto name = "splitByWhitespace";29
30static bool isVariadic() { return true; }31static size_t getNumberOfArguments() { return 0; }32
33static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; }34
35static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)36{37checkArgumentsWithOptionalMaxSubstrings(func, arguments);38}39
40static constexpr auto strings_argument_position = 0uz;41
42void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)43{44max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;45max_splits = extractMaxSplits(arguments, 1);46}47
48/// Called for each next string.49void set(Pos pos_, Pos end_)50{51pos = pos_;52end = end_;53splits = 0;54}55
56/// Get the next token, if any, or return false.57bool get(Pos & token_begin, Pos & token_end)58{59/// Skip garbage60while (pos < end && isWhitespaceASCII(*pos))61++pos;62
63if (pos == end)64return false;65
66token_begin = pos;67
68if (max_splits)69{70if (max_substrings_includes_remaining_string)71{72if (splits == *max_splits - 1)73{74token_end = end;75pos = end;76return true;77}78}79else80if (splits == *max_splits)81return false;82}83
84while (pos < end && !isWhitespaceASCII(*pos))85++pos;86
87token_end = pos;88splits++;89
90return true;91}92};93
94using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;95
96}
97
98REGISTER_FUNCTION(SplitByWhitespace)99{
100factory.registerFunction<FunctionSplitByWhitespace>();101}
102
103}
104