ClickHouse
249 строк · 9.8 Кб
1#include <Columns/ColumnString.h>
2#include <Columns/ColumnStringHelpers.h>
3#include <DataTypes/DataTypeString.h>
4#include <Functions/FunctionFactory.h>
5#include <Functions/FunctionHelpers.h>
6#include <Functions/GatherUtils/Algorithms.h>
7#include <Functions/GatherUtils/Sinks.h>
8#include <Functions/GatherUtils/Sources.h>
9#include <Functions/IFunction.h>
10#include <Functions/formatString.h>
11#include <IO/WriteHelpers.h>
12#include <base/map.h>
13
14
15namespace DB
16{
17namespace ErrorCodes
18{
19extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
20}
21
22using namespace GatherUtils;
23
24namespace
25{
26
27template <typename Name, bool is_injective>
28class ConcatImpl : public IFunction
29{
30public:
31static constexpr auto name = Name::name;
32explicit ConcatImpl(ContextPtr context_) : context(context_) { }
33static FunctionPtr create(ContextPtr context) { return std::make_shared<ConcatImpl>(context); }
34
35String getName() const override { return name; }
36
37bool isVariadic() const override { return true; }
38
39size_t getNumberOfArguments() const override { return 0; }
40
41bool isInjective(const ColumnsWithTypeAndName &) const override { return is_injective; }
42
43bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
44
45bool useDefaultImplementationForConstants() const override { return true; }
46
47DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
48{
49if (arguments.size() < 2)
50throw Exception(
51ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
52"Number of arguments for function {} doesn't match: passed {}, should be at least 2",
53getName(),
54arguments.size());
55
56return std::make_shared<DataTypeString>();
57}
58
59ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
60{
61/// Format function is not proven to be faster for two arguments.
62/// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl.
63/// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%).
64/// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%).
65if (arguments.size() == 2)
66return executeBinary(arguments, input_rows_count);
67return executeFormatImpl(arguments, input_rows_count);
68}
69
70private:
71ContextWeakPtr context;
72
73ColumnPtr executeBinary(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
74{
75const IColumn * c0 = arguments[0].column.get();
76const IColumn * c1 = arguments[1].column.get();
77
78const ColumnString * c0_string = checkAndGetColumn<ColumnString>(c0);
79const ColumnString * c1_string = checkAndGetColumn<ColumnString>(c1);
80const ColumnConst * c0_const_string = checkAndGetColumnConst<ColumnString>(c0);
81const ColumnConst * c1_const_string = checkAndGetColumnConst<ColumnString>(c1);
82
83auto col_res = ColumnString::create();
84
85if (c0_string && c1_string)
86concat(StringSource(*c0_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
87else if (c0_string && c1_const_string)
88concat(StringSource(*c0_string), ConstSource<StringSource>(*c1_const_string), StringSink(*col_res, c0->size()));
89else if (c0_const_string && c1_string)
90concat(ConstSource<StringSource>(*c0_const_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
91else
92{
93/// Fallback: use generic implementation for not very important cases.
94return executeFormatImpl(arguments, input_rows_count);
95}
96
97return col_res;
98}
99
100ColumnPtr executeFormatImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
101{
102const size_t num_arguments = arguments.size();
103assert(num_arguments >= 2);
104
105auto col_res = ColumnString::create();
106std::vector<const ColumnString::Chars *> data(num_arguments);
107std::vector<const ColumnString::Offsets *> offsets(num_arguments);
108std::vector<size_t> fixed_string_sizes(num_arguments);
109std::vector<std::optional<String>> constant_strings(num_arguments);
110std::vector<ColumnString::MutablePtr> converted_col_ptrs(num_arguments);
111bool has_column_string = false;
112bool has_column_fixed_string = false;
113for (size_t i = 0; i < num_arguments; ++i)
114{
115const ColumnPtr & column = arguments[i].column;
116if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
117{
118has_column_string = true;
119data[i] = &col->getChars();
120offsets[i] = &col->getOffsets();
121}
122else if (const ColumnFixedString * fixed_col = checkAndGetColumn<ColumnFixedString>(column.get()))
123{
124has_column_fixed_string = true;
125data[i] = &fixed_col->getChars();
126fixed_string_sizes[i] = fixed_col->getN();
127}
128else if (const ColumnConst * const_col = checkAndGetColumnConstStringOrFixedString(column.get()))
129{
130constant_strings[i] = const_col->getValue<String>();
131}
132else
133{
134/// A non-String/non-FixedString-type argument: use the default serialization to convert it to String
135auto full_column = column->convertToFullIfNeeded();
136auto serialization = arguments[i].type->getDefaultSerialization();
137auto converted_col_str = ColumnString::create();
138ColumnStringHelpers::WriteHelper write_helper(*converted_col_str, column->size());
139auto & write_buffer = write_helper.getWriteBuffer();
140FormatSettings format_settings;
141for (size_t row = 0; row < column->size(); ++row)
142{
143serialization->serializeText(*full_column, row, write_buffer, format_settings);
144write_helper.rowWritten();
145}
146write_helper.finalize();
147
148/// Keep the pointer alive
149converted_col_ptrs[i] = std::move(converted_col_str);
150
151/// Same as the normal `ColumnString` branch
152has_column_string = true;
153data[i] = &converted_col_ptrs[i]->getChars();
154offsets[i] = &converted_col_ptrs[i]->getOffsets();
155}
156}
157
158String pattern;
159pattern.reserve(2 * num_arguments);
160
161for (size_t i = 0; i < num_arguments; ++i)
162pattern += "{}";
163
164FormatStringImpl::formatExecute(
165has_column_string,
166has_column_fixed_string,
167std::move(pattern),
168data,
169offsets,
170fixed_string_sizes,
171constant_strings,
172col_res->getChars(),
173col_res->getOffsets(),
174input_rows_count);
175
176return col_res;
177}
178};
179
180
181struct NameConcat
182{
183static constexpr auto name = "concat";
184};
185struct NameConcatAssumeInjective
186{
187static constexpr auto name = "concatAssumeInjective";
188};
189
190using FunctionConcat = ConcatImpl<NameConcat, false>;
191using FunctionConcatAssumeInjective = ConcatImpl<NameConcatAssumeInjective, true>;
192
193
194/// Works with arrays via `arrayConcat`, maps via `mapConcat`, and tuples via `tupleConcat`.
195/// Additionally, allows concatenation of arbitrary types that can be cast to string using the corresponding default serialization.
196class ConcatOverloadResolver : public IFunctionOverloadResolver
197{
198public:
199static constexpr auto name = "concat";
200static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<ConcatOverloadResolver>(context); }
201
202explicit ConcatOverloadResolver(ContextPtr context_) : context(context_) { }
203
204String getName() const override { return name; }
205size_t getNumberOfArguments() const override { return 0; }
206bool isVariadic() const override { return true; }
207
208FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
209{
210if (arguments.size() == 1)
211return FunctionFactory::instance().getImpl("toString", context)->build(arguments);
212if (std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); }))
213return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
214if (std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); }))
215return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
216if (std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); }))
217return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments);
218return std::make_unique<FunctionToFunctionBaseAdaptor>(
219FunctionConcat::create(context),
220collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
221return_type);
222}
223
224DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
225{
226if (arguments.empty())
227throw Exception(
228ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
229"Number of arguments for function {} doesn't match: passed {}, should be at least 1.",
230getName(),
231arguments.size());
232
233/// We always return Strings from concat, even if arguments were fixed strings.
234return std::make_shared<DataTypeString>();
235}
236
237private:
238ContextPtr context;
239};
240
241}
242
243REGISTER_FUNCTION(Concat)
244{
245factory.registerFunction<ConcatOverloadResolver>({}, FunctionFactory::CaseInsensitive);
246factory.registerFunction<FunctionConcatAssumeInjective>();
247}
248
249}
250