ClickHouse
393 строки · 15.2 Кб
1#include <Functions/IFunction.h>
2#include <Functions/FunctionFactory.h>
3#include <Functions/FunctionHelpers.h>
4#include <DataTypes/DataTypeMap.h>
5#include <DataTypes/DataTypesNumber.h>
6#include <DataTypes/DataTypeArray.h>
7#include <DataTypes/DataTypeTuple.h>
8#include <DataTypes/getLeastSupertype.h>
9#include <Columns/ColumnMap.h>
10#include <Interpreters/castColumn.h>
11#include <Interpreters/Context.h>
12#include <Common/HashTable/HashSet.h>
13
14
15namespace DB
16{
17namespace ErrorCodes
18{
19extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
20extern const int ILLEGAL_TYPE_OF_ARGUMENT;
21extern const int SIZES_OF_ARRAYS_DONT_MATCH;
22extern const int ILLEGAL_COLUMN;
23}
24
25namespace
26{
27
28// map(x, y, ...) is a function that allows you to make key-value pair
29class FunctionMap : public IFunction
30{
31public:
32static constexpr auto name = "map";
33
34explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {}
35
36static FunctionPtr create(ContextPtr context)
37{
38return std::make_shared<FunctionMap>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
39}
40
41String getName() const override
42{
43return name;
44}
45
46bool isVariadic() const override
47{
48return true;
49}
50
51size_t getNumberOfArguments() const override
52{
53return 0;
54}
55
56bool isInjective(const ColumnsWithTypeAndName &) const override
57{
58return true;
59}
60
61bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
62
63bool useDefaultImplementationForNulls() const override { return false; }
64/// map(..., Nothing) -> Map(..., Nothing)
65bool useDefaultImplementationForNothing() const override { return false; }
66bool useDefaultImplementationForConstants() const override { return true; }
67bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
68
69DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
70{
71if (arguments.size() % 2 != 0)
72throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
73"Function {} requires even number of arguments, but {} given", getName(), arguments.size());
74
75DataTypes keys, values;
76for (size_t i = 0; i < arguments.size(); i += 2)
77{
78keys.emplace_back(arguments[i]);
79values.emplace_back(arguments[i + 1]);
80}
81
82DataTypes tmp;
83if (use_variant_as_common_type)
84{
85tmp.emplace_back(getLeastSupertypeOrVariant(keys));
86tmp.emplace_back(getLeastSupertypeOrVariant(values));
87}
88else
89{
90tmp.emplace_back(getLeastSupertype(keys));
91tmp.emplace_back(getLeastSupertype(values));
92}
93return std::make_shared<DataTypeMap>(tmp);
94}
95
96ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
97{
98size_t num_elements = arguments.size();
99
100if (num_elements == 0)
101return result_type->createColumnConstWithDefaultValue(input_rows_count);
102
103const auto & result_type_map = static_cast<const DataTypeMap &>(*result_type);
104const DataTypePtr & key_type = result_type_map.getKeyType();
105const DataTypePtr & value_type = result_type_map.getValueType();
106
107Columns columns_holder(num_elements);
108ColumnRawPtrs column_ptrs(num_elements);
109
110for (size_t i = 0; i < num_elements; ++i)
111{
112const auto & arg = arguments[i];
113const auto to_type = i % 2 == 0 ? key_type : value_type;
114
115ColumnPtr preprocessed_column = castColumn(arg, to_type);
116preprocessed_column = preprocessed_column->convertToFullColumnIfConst();
117
118columns_holder[i] = std::move(preprocessed_column);
119column_ptrs[i] = columns_holder[i].get();
120}
121
122/// Create and fill the result map.
123
124MutableColumnPtr keys_data = key_type->createColumn();
125MutableColumnPtr values_data = value_type->createColumn();
126MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
127
128size_t total_elements = input_rows_count * num_elements / 2;
129keys_data->reserve(total_elements);
130values_data->reserve(total_elements);
131offsets->reserve(input_rows_count);
132
133IColumn::Offset current_offset = 0;
134for (size_t i = 0; i < input_rows_count; ++i)
135{
136for (size_t j = 0; j < num_elements; j += 2)
137{
138keys_data->insertFrom(*column_ptrs[j], i);
139values_data->insertFrom(*column_ptrs[j + 1], i);
140}
141
142current_offset += num_elements / 2;
143offsets->insert(current_offset);
144}
145
146auto nested_column = ColumnArray::create(
147ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}),
148std::move(offsets));
149
150return ColumnMap::create(nested_column);
151}
152
153private:
154bool use_variant_as_common_type = false;
155};
156
157/// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays
158class FunctionMapFromArrays : public IFunction
159{
160public:
161static constexpr auto name = "mapFromArrays";
162
163static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMapFromArrays>(); }
164String getName() const override { return name; }
165
166size_t getNumberOfArguments() const override { return 2; }
167
168bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
169bool useDefaultImplementationForNulls() const override { return false; }
170bool useDefaultImplementationForConstants() const override { return true; }
171
172DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
173{
174if (arguments.size() != 2)
175throw Exception(
176ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
177"Function {} requires 2 arguments, but {} given",
178getName(),
179arguments.size());
180
181/// The first argument should always be Array.
182/// Because key type can not be nested type of Map, which is Tuple
183DataTypePtr key_type;
184if (const auto * keys_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()))
185key_type = keys_type->getNestedType();
186else
187throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an Array", getName());
188
189DataTypePtr value_type;
190if (const auto * value_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get()))
191value_type = value_array_type->getNestedType();
192else if (const auto * value_map_type = checkAndGetDataType<DataTypeMap>(arguments[1].get()))
193value_type = std::make_shared<DataTypeTuple>(value_map_type->getKeyValueTypes());
194else
195throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be Array or Map", getName());
196
197DataTypes key_value_types{key_type, value_type};
198return std::make_shared<DataTypeMap>(key_value_types);
199}
200
201ColumnPtr executeImpl(
202const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t /* input_rows_count */) const override
203{
204bool is_keys_const = isColumnConst(*arguments[0].column);
205ColumnPtr holder_keys;
206const ColumnArray * col_keys;
207if (is_keys_const)
208{
209holder_keys = arguments[0].column->convertToFullColumnIfConst();
210col_keys = checkAndGetColumn<ColumnArray>(holder_keys.get());
211}
212else
213{
214col_keys = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
215}
216
217if (!col_keys)
218throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The first argument of function {} must be Array", getName());
219
220bool is_values_const = isColumnConst(*arguments[1].column);
221ColumnPtr holder_values;
222if (is_values_const)
223holder_values = arguments[1].column->convertToFullColumnIfConst();
224else
225holder_values = arguments[1].column;
226
227const ColumnArray * col_values;
228if (const auto * col_values_array = checkAndGetColumn<ColumnArray>(holder_values.get()))
229col_values = col_values_array;
230else if (const auto * col_values_map = checkAndGetColumn<ColumnMap>(holder_values.get()))
231col_values = &col_values_map->getNestedColumn();
232else
233throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second arguments of function {} must be Array or Map", getName());
234
235if (!col_keys->hasEqualOffsets(*col_values))
236throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Two arguments for function {} must have equal sizes", getName());
237
238const auto & data_keys = col_keys->getDataPtr();
239const auto & data_values = col_values->getDataPtr();
240const auto & offsets = col_keys->getOffsetsPtr();
241auto nested_column = ColumnArray::create(ColumnTuple::create(Columns{data_keys, data_values}), offsets);
242return ColumnMap::create(nested_column);
243}
244};
245
246class FunctionMapUpdate : public IFunction
247{
248public:
249static constexpr auto name = "mapUpdate";
250static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMapUpdate>(); }
251
252String getName() const override
253{
254return name;
255}
256
257size_t getNumberOfArguments() const override { return 2; }
258
259bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
260
261DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
262{
263if (arguments.size() != 2)
264throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
265"Number of arguments for function {} doesn't match: passed {}, should be 2",
266getName(), arguments.size());
267
268const auto * left = checkAndGetDataType<DataTypeMap>(arguments[0].type.get());
269const auto * right = checkAndGetDataType<DataTypeMap>(arguments[1].type.get());
270
271if (!left || !right)
272throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
273"The two arguments for function {} must be both Map type", getName());
274
275if (!left->getKeyType()->equals(*right->getKeyType()) || !left->getValueType()->equals(*right->getValueType()))
276throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
277"The Key And Value type of Map for function {} must be the same", getName());
278
279return std::make_shared<DataTypeMap>(left->getKeyType(), left->getValueType());
280}
281
282bool useDefaultImplementationForConstants() const override { return true; }
283
284ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
285{
286bool is_left_const = isColumnConst(*arguments[0].column);
287bool is_right_const = isColumnConst(*arguments[1].column);
288
289const auto * map_column_left = is_left_const
290? checkAndGetColumnConstData<ColumnMap>(arguments[0].column.get())
291: checkAndGetColumn<ColumnMap>(arguments[0].column.get());
292
293const auto * map_column_right = is_right_const
294? checkAndGetColumnConstData<ColumnMap>(arguments[1].column.get())
295: checkAndGetColumn<ColumnMap>(arguments[1].column.get());
296
297if (!map_column_left || !map_column_right)
298throw Exception(ErrorCodes::ILLEGAL_COLUMN,
299"Arguments for function {} must be maps, got {} and {} instead",
300getName(), arguments[0].column->getName(), arguments[1].column->getName());
301
302const auto & nested_column_left = map_column_left->getNestedColumn();
303const auto & keys_data_left = map_column_left->getNestedData().getColumn(0);
304const auto & values_data_left = map_column_left->getNestedData().getColumn(1);
305const auto & offsets_left = nested_column_left.getOffsets();
306
307const auto & nested_column_right = map_column_right->getNestedColumn();
308const auto & keys_data_right = map_column_right->getNestedData().getColumn(0);
309const auto & values_data_right = map_column_right->getNestedData().getColumn(1);
310const auto & offsets_right = nested_column_right.getOffsets();
311
312auto result_keys = keys_data_left.cloneEmpty();
313auto result_values = values_data_left.cloneEmpty();
314
315size_t size_to_reserve = keys_data_right.size() + (keys_data_left.size() - keys_data_right.size());
316
317result_keys->reserve(size_to_reserve);
318result_values->reserve(size_to_reserve);
319
320auto result_offsets = ColumnVector<IColumn::Offset>::create(input_rows_count);
321auto & result_offsets_data = result_offsets->getData();
322
323using Set = HashSetWithStackMemory<StringRef, StringRefHash, 4>;
324
325Set right_keys_const;
326if (is_right_const)
327{
328for (size_t i = 0; i < keys_data_right.size(); ++i)
329right_keys_const.insert(keys_data_right.getDataAt(i));
330}
331
332IColumn::Offset current_offset = 0;
333for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
334{
335size_t left_from = is_left_const ? 0 : offsets_left[row_idx - 1];
336size_t left_to = is_left_const ? offsets_left[0] : offsets_left[row_idx];
337
338size_t right_from = is_right_const ? 0 : offsets_right[row_idx - 1];
339size_t right_to = is_right_const ? offsets_right[0] : offsets_right[row_idx];
340
341auto execute_row = [&](const auto & set)
342{
343for (size_t i = left_from; i < left_to; ++i)
344{
345if (!set.find(keys_data_left.getDataAt(i)))
346{
347result_keys->insertFrom(keys_data_left, i);
348result_values->insertFrom(values_data_left, i);
349++current_offset;
350}
351}
352};
353
354if (is_right_const)
355{
356execute_row(right_keys_const);
357}
358else
359{
360Set right_keys;
361for (size_t i = right_from; i < right_to; ++i)
362right_keys.insert(keys_data_right.getDataAt(i));
363
364execute_row(right_keys);
365}
366
367size_t right_map_size = right_to - right_from;
368result_keys->insertRangeFrom(keys_data_right, right_from, right_map_size);
369result_values->insertRangeFrom(values_data_right, right_from, right_map_size);
370
371current_offset += right_map_size;
372result_offsets_data[row_idx] = current_offset;
373}
374
375auto nested_column = ColumnArray::create(
376ColumnTuple::create(Columns{std::move(result_keys), std::move(result_values)}),
377std::move(result_offsets));
378
379return ColumnMap::create(nested_column);
380}
381};
382
383}
384
385REGISTER_FUNCTION(Map)
386{
387factory.registerFunction<FunctionMap>();
388factory.registerFunction<FunctionMapUpdate>();
389factory.registerFunction<FunctionMapFromArrays>();
390factory.registerAlias("MAP_FROM_ARRAYS", "mapFromArrays");
391}
392
393}
394