ClickHouse

Форк
0
/
KqlArraySort.cpp 
264 строки · 10.6 Кб
1
#include <Columns/ColumnArray.h>
2
#include <Columns/ColumnTuple.h>
3
#include <DataTypes/DataTypeArray.h>
4
#include <DataTypes/DataTypeLowCardinality.h>
5
#include <DataTypes/DataTypeTuple.h>
6
#include <Functions/FunctionFactory.h>
7
#include <Functions/FunctionHelpers.h>
8
#include <Functions/Kusto/KqlFunctionBase.h>
9

10
namespace DB
11
{
12
namespace ErrorCodes
13
{
14
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
15
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
16
    extern const int ILLEGAL_COLUMN;
17
}
18

19
template <typename Name, bool is_desc>
20
class FunctionKqlArraySort : public KqlFunctionBase
21
{
22
public:
23
    static constexpr auto name = Name::name;
24
    explicit FunctionKqlArraySort(ContextPtr context_) : context(context_) { }
25
    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionKqlArraySort>(context); }
26

27
    String getName() const override { return name; }
28

29
    bool isVariadic() const override { return true; }
30
    size_t getNumberOfArguments() const override { return 0; }
31
    bool useDefaultImplementationForConstants() const override { return true; }
32
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
33

34
    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
35
    {
36
        if (arguments.empty())
37
            throw Exception(
38
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
39
                "Function {} needs at least one argument; passed {}.",
40
                getName(),
41
                arguments.size());
42

43
        auto array_count = arguments.size();
44

45
        if (!isArray(arguments.at(array_count - 1).type))
46
            --array_count;
47

48
        DataTypes nested_types;
49
        for (size_t index = 0; index < array_count; ++index)
50
        {
51
            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[index].type.get());
52
            if (!array_type)
53
                throw Exception(
54
                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
55
                    "Argument {} of function {} must be array. Found {} instead.",
56
                    index + 1,
57
                    getName(),
58
                    arguments[0].type->getName());
59

60
            nested_types.emplace_back(array_type->getNestedType());
61
        }
62

63
        DataTypes data_types(array_count);
64

65
        for (size_t i = 0; i < array_count; ++i)
66
            data_types[i] = std::make_shared<DataTypeArray>(makeNullable(nested_types[i]));
67

68
        return std::make_shared<DataTypeTuple>(data_types);
69
    }
70

71
    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
72
    {
73
        size_t array_count = arguments.size();
74
        const auto & last_arg = arguments[array_count - 1];
75

76
        size_t input_rows_count_local = input_rows_count;
77

78
        bool null_last = true;
79
        if (!isArray(last_arg.type))
80
        {
81
            --array_count;
82
            null_last = check_condition(last_arg, context, input_rows_count_local);
83
        }
84

85
        ColumnsWithTypeAndName new_args;
86
        ColumnPtr first_array_column;
87
        std::unordered_set<size_t> null_indices;
88
        DataTypes nested_types;
89

90
        String sort_function = is_desc ? "arrayReverseSort" : "arraySort";
91

92
        for (size_t i = 0; i < array_count; ++i)
93
        {
94
            ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst();
95

96
            const ColumnArray * column_array = checkAndGetColumn<ColumnArray>(holder.get());
97
            const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].type.get());
98

99
            if (!column_array)
100
                throw Exception(
101
                    ErrorCodes::ILLEGAL_COLUMN,
102
                    "Argument {} of function {} must be array. Found column {} instead.",
103
                    i + 1,
104
                    getName(),
105
                    holder->getName());
106

107
            nested_types.emplace_back(makeNullable(array_type->getNestedType()));
108
            if (i == 0)
109
            {
110
                first_array_column = holder;
111
                new_args.push_back(arguments[i]);
112
            }
113
            else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
114
            {
115
                null_indices.insert(i);
116
            }
117
            else
118
                new_args.push_back(arguments[i]);
119
        }
120

121
        auto zipped
122
            = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local);
123

124
        ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared<DataTypeArray>(result_type), "zipped"}});
125
        auto sorted_tuple
126
            = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local);
127

128
        auto null_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
129

130
        Columns tuple_columns(array_count);
131
        size_t sorted_index = 0;
132
        for (size_t i = 0; i < array_count; ++i)
133
        {
134
            if (null_indices.contains(i))
135
            {
136
                auto fun_array = FunctionFactory::instance().get("array", context);
137

138
                DataTypePtr arg_type
139
                    = std::make_shared<DataTypeArray>(makeNullable(nested_types[i]));
140

141
                ColumnsWithTypeAndName null_array_arg({
142
                    {null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"},
143
                });
144

145
                tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local);
146
                tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst();
147
            }
148
            else
149
            {
150
                ColumnsWithTypeAndName untuple_args(
151
                    {{ColumnWithTypeAndName(sorted_tuple, std::make_shared<DataTypeArray>(result_type), "sorted")},
152
                     {DataTypeUInt8().createColumnConst(1, toField(UInt8(sorted_index + 1))), std::make_shared<DataTypeUInt8>(), ""}});
153
                auto tuple_coulmn = FunctionFactory::instance()
154
                                        .get("tupleElement", context)
155
                                        ->build(untuple_args)
156
                                        ->execute(untuple_args, result_type, input_rows_count_local);
157

158
                auto out_tmp = ColumnArray::create(nested_types[i]->createColumn());
159

160
                size_t array_size = tuple_coulmn->size();
161
                const auto * arr = checkAndGetColumn<ColumnArray>(tuple_coulmn.get());
162

163
                for (size_t j = 0; j < array_size; ++j)
164
                {
165
                    Field arr_field;
166
                    arr->get(j, arr_field);
167
                    out_tmp->insert(arr_field);
168
                }
169

170
                tuple_columns[i] = std::move(out_tmp);
171

172
                ++sorted_index;
173
            }
174
        }
175

176
        if (!null_last)
177
        {
178
            Columns adjusted_columns(array_count);
179

180
            ColumnWithTypeAndName arg_of_index{nullptr, std::make_shared<DataTypeArray>(nested_types[0]), "array"};
181
            arg_of_index.column = tuple_columns[0];
182

183
            auto inside_null_type = nested_types[0];
184
            ColumnsWithTypeAndName indexof_args({
185
                arg_of_index,
186
                {inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"},
187
            });
188

189
            auto null_index_datetype = std::make_shared<DataTypeUInt64>();
190

191
            ColumnWithTypeAndName slice_index{nullptr, null_index_datetype, ""};
192
            slice_index.column = FunctionFactory::instance()
193
                                     .get("indexOf", context)
194
                                     ->build(indexof_args)
195
                                     ->execute(indexof_args, result_type, input_rows_count_local);
196

197
            auto null_index_in_array = slice_index.column->get64(0);
198
            if (null_index_in_array > 0)
199
            {
200
                ColumnWithTypeAndName slice_index_len{nullptr, null_index_datetype, ""};
201
                slice_index_len.column = DataTypeUInt64().createColumnConst(1, toField(UInt64(null_index_in_array - 1)));
202

203
                auto fun_slice = FunctionFactory::instance().get("arraySlice", context);
204

205
                for (size_t i = 0; i < array_count; ++i)
206
                {
207
                    if (null_indices.contains(i))
208
                    {
209
                        adjusted_columns[i] = std::move(tuple_columns[i]);
210
                    }
211
                    else
212
                    {
213
                        DataTypePtr arg_type = std::make_shared<DataTypeArray>(nested_types[i]);
214

215
                        ColumnsWithTypeAndName slice_args_left(
216
                            {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")},
217
                             {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared<DataTypeUInt8>(), ""},
218
                             slice_index_len});
219

220
                        ColumnsWithTypeAndName slice_args_right(
221
                            {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index});
222
                        ColumnWithTypeAndName arr_left{
223
                            fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""};
224
                        ColumnWithTypeAndName arr_right{
225
                            fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""};
226

227
                        ColumnsWithTypeAndName arr_cancat({arr_right, arr_left});
228
                        auto out_tmp = FunctionFactory::instance()
229
                                           .get("arrayConcat", context)
230
                                           ->build(arr_cancat)
231
                                           ->execute(arr_cancat, arg_type, input_rows_count_local);
232
                        adjusted_columns[i] = std::move(out_tmp);
233
                    }
234
                }
235
                return ColumnTuple::create(adjusted_columns);
236
            }
237
        }
238
        return ColumnTuple::create(tuple_columns);
239
    }
240

241
private:
242
    ContextPtr context;
243
};
244

245
struct NameKqlArraySortAsc
246
{
247
    static constexpr auto name = "kql_array_sort_asc";
248
};
249

250
struct NameKqlArraySortDesc
251
{
252
    static constexpr auto name = "kql_array_sort_desc";
253
};
254

255
using FunctionKqlArraySortAsc = FunctionKqlArraySort<NameKqlArraySortAsc, false>;
256
using FunctionKqlArraySortDesc = FunctionKqlArraySort<NameKqlArraySortDesc, true>;
257

258
REGISTER_FUNCTION(KqlArraySort)
259
{
260
    factory.registerFunction<FunctionKqlArraySortAsc>();
261
    factory.registerFunction<FunctionKqlArraySortDesc>();
262
}
263

264
}
265

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.