ClickHouse

Форк
0
393 строки · 15.2 Кб
1
#include <Functions/IFunction.h>
2
#include <Functions/FunctionFactory.h>
3
#include <Functions/FunctionHelpers.h>
4
#include <DataTypes/DataTypeMap.h>
5
#include <DataTypes/DataTypesNumber.h>
6
#include <DataTypes/DataTypeArray.h>
7
#include <DataTypes/DataTypeTuple.h>
8
#include <DataTypes/getLeastSupertype.h>
9
#include <Columns/ColumnMap.h>
10
#include <Interpreters/castColumn.h>
11
#include <Interpreters/Context.h>
12
#include <Common/HashTable/HashSet.h>
13

14

15
namespace DB
16
{
17
namespace ErrorCodes
18
{
19
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
20
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
21
    extern const int SIZES_OF_ARRAYS_DONT_MATCH;
22
    extern const int ILLEGAL_COLUMN;
23
}
24

25
namespace
26
{
27

28
// map(x, y, ...) is a function that allows you to make key-value pair
29
class FunctionMap : public IFunction
30
{
31
public:
32
    static constexpr auto name = "map";
33

34
    explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {}
35

36
    static FunctionPtr create(ContextPtr context)
37
    {
38
        return std::make_shared<FunctionMap>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
39
    }
40

41
    String getName() const override
42
    {
43
        return name;
44
    }
45

46
    bool isVariadic() const override
47
    {
48
        return true;
49
    }
50

51
    size_t getNumberOfArguments() const override
52
    {
53
        return 0;
54
    }
55

56
    bool isInjective(const ColumnsWithTypeAndName &) const override
57
    {
58
        return true;
59
    }
60

61
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
62

63
    bool useDefaultImplementationForNulls() const override { return false; }
64
    /// map(..., Nothing) -> Map(..., Nothing)
65
    bool useDefaultImplementationForNothing() const override { return false; }
66
    bool useDefaultImplementationForConstants() const override { return true; }
67
    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
68

69
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
70
    {
71
        if (arguments.size() % 2 != 0)
72
            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
73
                "Function {} requires even number of arguments, but {} given", getName(), arguments.size());
74

75
        DataTypes keys, values;
76
        for (size_t i = 0; i < arguments.size(); i += 2)
77
        {
78
            keys.emplace_back(arguments[i]);
79
            values.emplace_back(arguments[i + 1]);
80
        }
81

82
        DataTypes tmp;
83
        if (use_variant_as_common_type)
84
        {
85
            tmp.emplace_back(getLeastSupertypeOrVariant(keys));
86
            tmp.emplace_back(getLeastSupertypeOrVariant(values));
87
        }
88
        else
89
        {
90
            tmp.emplace_back(getLeastSupertype(keys));
91
            tmp.emplace_back(getLeastSupertype(values));
92
        }
93
        return std::make_shared<DataTypeMap>(tmp);
94
    }
95

96
    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
97
    {
98
        size_t num_elements = arguments.size();
99

100
        if (num_elements == 0)
101
            return result_type->createColumnConstWithDefaultValue(input_rows_count);
102

103
        const auto & result_type_map = static_cast<const DataTypeMap &>(*result_type);
104
        const DataTypePtr & key_type = result_type_map.getKeyType();
105
        const DataTypePtr & value_type = result_type_map.getValueType();
106

107
        Columns columns_holder(num_elements);
108
        ColumnRawPtrs column_ptrs(num_elements);
109

110
        for (size_t i = 0; i < num_elements; ++i)
111
        {
112
            const auto & arg = arguments[i];
113
            const auto to_type = i % 2 == 0 ? key_type : value_type;
114

115
            ColumnPtr preprocessed_column = castColumn(arg, to_type);
116
            preprocessed_column = preprocessed_column->convertToFullColumnIfConst();
117

118
            columns_holder[i] = std::move(preprocessed_column);
119
            column_ptrs[i] = columns_holder[i].get();
120
        }
121

122
        /// Create and fill the result map.
123

124
        MutableColumnPtr keys_data = key_type->createColumn();
125
        MutableColumnPtr values_data = value_type->createColumn();
126
        MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
127

128
        size_t total_elements = input_rows_count * num_elements / 2;
129
        keys_data->reserve(total_elements);
130
        values_data->reserve(total_elements);
131
        offsets->reserve(input_rows_count);
132

133
        IColumn::Offset current_offset = 0;
134
        for (size_t i = 0; i < input_rows_count; ++i)
135
        {
136
            for (size_t j = 0; j < num_elements; j += 2)
137
            {
138
                keys_data->insertFrom(*column_ptrs[j], i);
139
                values_data->insertFrom(*column_ptrs[j + 1], i);
140
            }
141

142
            current_offset += num_elements / 2;
143
            offsets->insert(current_offset);
144
        }
145

146
        auto nested_column = ColumnArray::create(
147
            ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}),
148
            std::move(offsets));
149

150
        return ColumnMap::create(nested_column);
151
    }
152

153
private:
154
    bool use_variant_as_common_type = false;
155
};
156

157
/// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays
158
class FunctionMapFromArrays : public IFunction
159
{
160
public:
161
    static constexpr auto name = "mapFromArrays";
162

163
    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMapFromArrays>(); }
164
    String getName() const override { return name; }
165

166
    size_t getNumberOfArguments() const override { return 2; }
167

168
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
169
    bool useDefaultImplementationForNulls() const override { return false; }
170
    bool useDefaultImplementationForConstants() const override { return true; }
171

172
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
173
    {
174
        if (arguments.size() != 2)
175
            throw Exception(
176
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
177
                "Function {} requires 2 arguments, but {} given",
178
                getName(),
179
                arguments.size());
180

181
        /// The first argument should always be Array.
182
        /// Because key type can not be nested type of Map, which is Tuple
183
        DataTypePtr key_type;
184
        if (const auto * keys_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()))
185
            key_type = keys_type->getNestedType();
186
        else
187
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an Array", getName());
188

189
        DataTypePtr value_type;
190
        if (const auto * value_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get()))
191
            value_type = value_array_type->getNestedType();
192
        else if (const auto * value_map_type = checkAndGetDataType<DataTypeMap>(arguments[1].get()))
193
            value_type = std::make_shared<DataTypeTuple>(value_map_type->getKeyValueTypes());
194
        else
195
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be Array or Map", getName());
196

197
        DataTypes key_value_types{key_type, value_type};
198
        return std::make_shared<DataTypeMap>(key_value_types);
199
    }
200

201
    ColumnPtr executeImpl(
202
        const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t /* input_rows_count */) const override
203
    {
204
        bool is_keys_const = isColumnConst(*arguments[0].column);
205
        ColumnPtr holder_keys;
206
        const ColumnArray * col_keys;
207
        if (is_keys_const)
208
        {
209
            holder_keys = arguments[0].column->convertToFullColumnIfConst();
210
            col_keys = checkAndGetColumn<ColumnArray>(holder_keys.get());
211
        }
212
        else
213
        {
214
            col_keys = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
215
        }
216

217
        if (!col_keys)
218
            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The first argument of function {} must be Array", getName());
219

220
        bool is_values_const = isColumnConst(*arguments[1].column);
221
        ColumnPtr holder_values;
222
        if (is_values_const)
223
            holder_values = arguments[1].column->convertToFullColumnIfConst();
224
        else
225
            holder_values = arguments[1].column;
226

227
        const ColumnArray * col_values;
228
        if (const auto * col_values_array = checkAndGetColumn<ColumnArray>(holder_values.get()))
229
            col_values = col_values_array;
230
        else if (const auto * col_values_map = checkAndGetColumn<ColumnMap>(holder_values.get()))
231
            col_values = &col_values_map->getNestedColumn();
232
        else
233
            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second arguments of function {} must be Array or Map", getName());
234

235
        if (!col_keys->hasEqualOffsets(*col_values))
236
            throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Two arguments for function {} must have equal sizes", getName());
237

238
        const auto & data_keys = col_keys->getDataPtr();
239
        const auto & data_values = col_values->getDataPtr();
240
        const auto & offsets = col_keys->getOffsetsPtr();
241
        auto nested_column = ColumnArray::create(ColumnTuple::create(Columns{data_keys, data_values}), offsets);
242
        return ColumnMap::create(nested_column);
243
    }
244
};
245

246
class FunctionMapUpdate : public IFunction
247
{
248
public:
249
    static constexpr auto name = "mapUpdate";
250
    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMapUpdate>(); }
251

252
    String getName() const override
253
    {
254
        return name;
255
    }
256

257
    size_t getNumberOfArguments() const override { return 2; }
258

259
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
260

261
    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
262
    {
263
        if (arguments.size() != 2)
264
            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
265
                "Number of arguments for function {} doesn't match: passed {}, should be 2",
266
                getName(), arguments.size());
267

268
        const auto * left = checkAndGetDataType<DataTypeMap>(arguments[0].type.get());
269
        const auto * right = checkAndGetDataType<DataTypeMap>(arguments[1].type.get());
270

271
        if (!left || !right)
272
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
273
                "The two arguments for function {} must be both Map type", getName());
274

275
        if (!left->getKeyType()->equals(*right->getKeyType()) || !left->getValueType()->equals(*right->getValueType()))
276
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
277
                "The Key And Value type of Map for function {} must be the same", getName());
278

279
        return std::make_shared<DataTypeMap>(left->getKeyType(), left->getValueType());
280
    }
281

282
    bool useDefaultImplementationForConstants() const override { return true; }
283

284
    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
285
    {
286
        bool is_left_const = isColumnConst(*arguments[0].column);
287
        bool is_right_const = isColumnConst(*arguments[1].column);
288

289
        const auto * map_column_left = is_left_const
290
            ? checkAndGetColumnConstData<ColumnMap>(arguments[0].column.get())
291
            : checkAndGetColumn<ColumnMap>(arguments[0].column.get());
292

293
        const auto * map_column_right = is_right_const
294
            ? checkAndGetColumnConstData<ColumnMap>(arguments[1].column.get())
295
            : checkAndGetColumn<ColumnMap>(arguments[1].column.get());
296

297
        if (!map_column_left || !map_column_right)
298
            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
299
                "Arguments for function {} must be maps, got {} and {} instead",
300
                getName(), arguments[0].column->getName(), arguments[1].column->getName());
301

302
        const auto & nested_column_left = map_column_left->getNestedColumn();
303
        const auto & keys_data_left = map_column_left->getNestedData().getColumn(0);
304
        const auto & values_data_left = map_column_left->getNestedData().getColumn(1);
305
        const auto & offsets_left = nested_column_left.getOffsets();
306

307
        const auto & nested_column_right = map_column_right->getNestedColumn();
308
        const auto & keys_data_right = map_column_right->getNestedData().getColumn(0);
309
        const auto & values_data_right = map_column_right->getNestedData().getColumn(1);
310
        const auto & offsets_right = nested_column_right.getOffsets();
311

312
        auto result_keys = keys_data_left.cloneEmpty();
313
        auto result_values = values_data_left.cloneEmpty();
314

315
        size_t size_to_reserve = keys_data_right.size() + (keys_data_left.size() - keys_data_right.size());
316

317
        result_keys->reserve(size_to_reserve);
318
        result_values->reserve(size_to_reserve);
319

320
        auto result_offsets = ColumnVector<IColumn::Offset>::create(input_rows_count);
321
        auto & result_offsets_data = result_offsets->getData();
322

323
        using Set = HashSetWithStackMemory<StringRef, StringRefHash, 4>;
324

325
        Set right_keys_const;
326
        if (is_right_const)
327
        {
328
            for (size_t i = 0; i < keys_data_right.size(); ++i)
329
                right_keys_const.insert(keys_data_right.getDataAt(i));
330
        }
331

332
        IColumn::Offset current_offset = 0;
333
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
334
        {
335
            size_t left_from = is_left_const ? 0 : offsets_left[row_idx - 1];
336
            size_t left_to = is_left_const ? offsets_left[0] : offsets_left[row_idx];
337

338
            size_t right_from = is_right_const ? 0 : offsets_right[row_idx - 1];
339
            size_t right_to = is_right_const ? offsets_right[0] : offsets_right[row_idx];
340

341
            auto execute_row = [&](const auto & set)
342
            {
343
                for (size_t i = left_from; i < left_to; ++i)
344
                {
345
                    if (!set.find(keys_data_left.getDataAt(i)))
346
                    {
347
                        result_keys->insertFrom(keys_data_left, i);
348
                        result_values->insertFrom(values_data_left, i);
349
                        ++current_offset;
350
                    }
351
                }
352
            };
353

354
            if (is_right_const)
355
            {
356
                execute_row(right_keys_const);
357
            }
358
            else
359
            {
360
                Set right_keys;
361
                for (size_t i = right_from; i < right_to; ++i)
362
                    right_keys.insert(keys_data_right.getDataAt(i));
363

364
                execute_row(right_keys);
365
            }
366

367
            size_t right_map_size = right_to - right_from;
368
            result_keys->insertRangeFrom(keys_data_right, right_from, right_map_size);
369
            result_values->insertRangeFrom(values_data_right, right_from, right_map_size);
370

371
            current_offset += right_map_size;
372
            result_offsets_data[row_idx] = current_offset;
373
        }
374

375
        auto nested_column = ColumnArray::create(
376
            ColumnTuple::create(Columns{std::move(result_keys), std::move(result_values)}),
377
            std::move(result_offsets));
378

379
        return ColumnMap::create(nested_column);
380
    }
381
};
382

383
}
384

385
REGISTER_FUNCTION(Map)
386
{
387
    factory.registerFunction<FunctionMap>();
388
    factory.registerFunction<FunctionMapUpdate>();
389
    factory.registerFunction<FunctionMapFromArrays>();
390
    factory.registerAlias("MAP_FROM_ARRAYS", "mapFromArrays");
391
}
392

393
}
394

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.