ClickHouse
264 строки · 10.6 Кб
1#include <Columns/ColumnArray.h>
2#include <Columns/ColumnTuple.h>
3#include <DataTypes/DataTypeArray.h>
4#include <DataTypes/DataTypeLowCardinality.h>
5#include <DataTypes/DataTypeTuple.h>
6#include <Functions/FunctionFactory.h>
7#include <Functions/FunctionHelpers.h>
8#include <Functions/Kusto/KqlFunctionBase.h>
9
10namespace DB
11{
12namespace ErrorCodes
13{
14extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
15extern const int ILLEGAL_TYPE_OF_ARGUMENT;
16extern const int ILLEGAL_COLUMN;
17}
18
19template <typename Name, bool is_desc>
20class FunctionKqlArraySort : public KqlFunctionBase
21{
22public:
23static constexpr auto name = Name::name;
24explicit FunctionKqlArraySort(ContextPtr context_) : context(context_) { }
25static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionKqlArraySort>(context); }
26
27String getName() const override { return name; }
28
29bool isVariadic() const override { return true; }
30size_t getNumberOfArguments() const override { return 0; }
31bool useDefaultImplementationForConstants() const override { return true; }
32bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
33
34DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
35{
36if (arguments.empty())
37throw Exception(
38ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
39"Function {} needs at least one argument; passed {}.",
40getName(),
41arguments.size());
42
43auto array_count = arguments.size();
44
45if (!isArray(arguments.at(array_count - 1).type))
46--array_count;
47
48DataTypes nested_types;
49for (size_t index = 0; index < array_count; ++index)
50{
51const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[index].type.get());
52if (!array_type)
53throw Exception(
54ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
55"Argument {} of function {} must be array. Found {} instead.",
56index + 1,
57getName(),
58arguments[0].type->getName());
59
60nested_types.emplace_back(array_type->getNestedType());
61}
62
63DataTypes data_types(array_count);
64
65for (size_t i = 0; i < array_count; ++i)
66data_types[i] = std::make_shared<DataTypeArray>(makeNullable(nested_types[i]));
67
68return std::make_shared<DataTypeTuple>(data_types);
69}
70
71ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
72{
73size_t array_count = arguments.size();
74const auto & last_arg = arguments[array_count - 1];
75
76size_t input_rows_count_local = input_rows_count;
77
78bool null_last = true;
79if (!isArray(last_arg.type))
80{
81--array_count;
82null_last = check_condition(last_arg, context, input_rows_count_local);
83}
84
85ColumnsWithTypeAndName new_args;
86ColumnPtr first_array_column;
87std::unordered_set<size_t> null_indices;
88DataTypes nested_types;
89
90String sort_function = is_desc ? "arrayReverseSort" : "arraySort";
91
92for (size_t i = 0; i < array_count; ++i)
93{
94ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst();
95
96const ColumnArray * column_array = checkAndGetColumn<ColumnArray>(holder.get());
97const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].type.get());
98
99if (!column_array)
100throw Exception(
101ErrorCodes::ILLEGAL_COLUMN,
102"Argument {} of function {} must be array. Found column {} instead.",
103i + 1,
104getName(),
105holder->getName());
106
107nested_types.emplace_back(makeNullable(array_type->getNestedType()));
108if (i == 0)
109{
110first_array_column = holder;
111new_args.push_back(arguments[i]);
112}
113else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
114{
115null_indices.insert(i);
116}
117else
118new_args.push_back(arguments[i]);
119}
120
121auto zipped
122= FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local);
123
124ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared<DataTypeArray>(result_type), "zipped"}});
125auto sorted_tuple
126= FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local);
127
128auto null_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
129
130Columns tuple_columns(array_count);
131size_t sorted_index = 0;
132for (size_t i = 0; i < array_count; ++i)
133{
134if (null_indices.contains(i))
135{
136auto fun_array = FunctionFactory::instance().get("array", context);
137
138DataTypePtr arg_type
139= std::make_shared<DataTypeArray>(makeNullable(nested_types[i]));
140
141ColumnsWithTypeAndName null_array_arg({
142{null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"},
143});
144
145tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local);
146tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst();
147}
148else
149{
150ColumnsWithTypeAndName untuple_args(
151{{ColumnWithTypeAndName(sorted_tuple, std::make_shared<DataTypeArray>(result_type), "sorted")},
152{DataTypeUInt8().createColumnConst(1, toField(UInt8(sorted_index + 1))), std::make_shared<DataTypeUInt8>(), ""}});
153auto tuple_coulmn = FunctionFactory::instance()
154.get("tupleElement", context)
155->build(untuple_args)
156->execute(untuple_args, result_type, input_rows_count_local);
157
158auto out_tmp = ColumnArray::create(nested_types[i]->createColumn());
159
160size_t array_size = tuple_coulmn->size();
161const auto * arr = checkAndGetColumn<ColumnArray>(tuple_coulmn.get());
162
163for (size_t j = 0; j < array_size; ++j)
164{
165Field arr_field;
166arr->get(j, arr_field);
167out_tmp->insert(arr_field);
168}
169
170tuple_columns[i] = std::move(out_tmp);
171
172++sorted_index;
173}
174}
175
176if (!null_last)
177{
178Columns adjusted_columns(array_count);
179
180ColumnWithTypeAndName arg_of_index{nullptr, std::make_shared<DataTypeArray>(nested_types[0]), "array"};
181arg_of_index.column = tuple_columns[0];
182
183auto inside_null_type = nested_types[0];
184ColumnsWithTypeAndName indexof_args({
185arg_of_index,
186{inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"},
187});
188
189auto null_index_datetype = std::make_shared<DataTypeUInt64>();
190
191ColumnWithTypeAndName slice_index{nullptr, null_index_datetype, ""};
192slice_index.column = FunctionFactory::instance()
193.get("indexOf", context)
194->build(indexof_args)
195->execute(indexof_args, result_type, input_rows_count_local);
196
197auto null_index_in_array = slice_index.column->get64(0);
198if (null_index_in_array > 0)
199{
200ColumnWithTypeAndName slice_index_len{nullptr, null_index_datetype, ""};
201slice_index_len.column = DataTypeUInt64().createColumnConst(1, toField(UInt64(null_index_in_array - 1)));
202
203auto fun_slice = FunctionFactory::instance().get("arraySlice", context);
204
205for (size_t i = 0; i < array_count; ++i)
206{
207if (null_indices.contains(i))
208{
209adjusted_columns[i] = std::move(tuple_columns[i]);
210}
211else
212{
213DataTypePtr arg_type = std::make_shared<DataTypeArray>(nested_types[i]);
214
215ColumnsWithTypeAndName slice_args_left(
216{{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")},
217{DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared<DataTypeUInt8>(), ""},
218slice_index_len});
219
220ColumnsWithTypeAndName slice_args_right(
221{{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index});
222ColumnWithTypeAndName arr_left{
223fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""};
224ColumnWithTypeAndName arr_right{
225fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""};
226
227ColumnsWithTypeAndName arr_cancat({arr_right, arr_left});
228auto out_tmp = FunctionFactory::instance()
229.get("arrayConcat", context)
230->build(arr_cancat)
231->execute(arr_cancat, arg_type, input_rows_count_local);
232adjusted_columns[i] = std::move(out_tmp);
233}
234}
235return ColumnTuple::create(adjusted_columns);
236}
237}
238return ColumnTuple::create(tuple_columns);
239}
240
241private:
242ContextPtr context;
243};
244
245struct NameKqlArraySortAsc
246{
247static constexpr auto name = "kql_array_sort_asc";
248};
249
250struct NameKqlArraySortDesc
251{
252static constexpr auto name = "kql_array_sort_desc";
253};
254
255using FunctionKqlArraySortAsc = FunctionKqlArraySort<NameKqlArraySortAsc, false>;
256using FunctionKqlArraySortDesc = FunctionKqlArraySort<NameKqlArraySortDesc, true>;
257
258REGISTER_FUNCTION(KqlArraySort)
259{
260factory.registerFunction<FunctionKqlArraySortAsc>();
261factory.registerFunction<FunctionKqlArraySortDesc>();
262}
263
264}
265