ClickHouse
581 строка · 21.9 Кб
1#include <Functions/IFunctionAdaptors.h>
2
3#include <Common/typeid_cast.h>
4#include <Common/assert_cast.h>
5#include <Common/SipHash.h>
6#include <Core/Block.h>
7#include <Core/TypeId.h>
8#include <Columns/ColumnConst.h>
9#include <Columns/ColumnNullable.h>
10#include <Columns/ColumnTuple.h>
11#include <Columns/ColumnLowCardinality.h>
12#include <Columns/ColumnSparse.h>
13#include <Columns/ColumnNothing.h>
14#include <DataTypes/DataTypeNothing.h>
15#include <DataTypes/DataTypeNullable.h>
16#include <DataTypes/Native.h>
17#include <DataTypes/DataTypeLowCardinality.h>
18#include <Functions/FunctionHelpers.h>
19#include <cstdlib>
20#include <memory>
21
22#include "config.h"
23
24#if USE_EMBEDDED_COMPILER
25# include <llvm/IR/IRBuilder.h>
26#endif
27
28
29namespace DB
30{
31
32namespace ErrorCodes
33{
34extern const int LOGICAL_ERROR;
35extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
36extern const int ILLEGAL_COLUMN;
37}
38
39namespace
40{
41
42bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
43{
44for (const auto & arg : args)
45if (!isColumnConst(*arg.column))
46return false;
47return true;
48}
49
50ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
51ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
52{
53size_t num_rows = input_rows_count;
54ColumnPtr indexes;
55
56/// Find first LowCardinality column and replace it to nested dictionary.
57for (auto & column : args)
58{
59if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
60{
61/// Single LowCardinality column is supported now.
62if (indexes)
63throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
64
65const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
66
67if (!low_cardinality_type)
68throw Exception(ErrorCodes::LOGICAL_ERROR,
69"Incompatible type for LowCardinality column: {}",
70column.type->getName());
71
72if (can_be_executed_on_default_arguments)
73{
74/// Normal case, when function can be executed on values' default.
75column.column = low_cardinality_column->getDictionary().getNestedColumn();
76indexes = low_cardinality_column->getIndexesPtr();
77}
78else
79{
80/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
81/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
82auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
83column.column = dict_encoded.dictionary;
84indexes = dict_encoded.indexes;
85}
86
87num_rows = column.column->size();
88column.type = low_cardinality_type->getDictionaryType();
89}
90}
91
92/// Change size of constants.
93for (auto & column : args)
94{
95if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
96{
97column.column = column_const->removeLowCardinality()->cloneResized(num_rows);
98column.type = removeLowCardinality(column.type);
99}
100}
101
102return indexes;
103}
104
105void convertLowCardinalityColumnsToFull(ColumnsWithTypeAndName & args)
106{
107for (auto & column : args)
108{
109column.column = recursiveRemoveLowCardinality(column.column);
110column.type = recursiveRemoveLowCardinality(column.type);
111}
112}
113
114}
115
116ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
117const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
118{
119ColumnNumbers arguments_to_remain_constants = getArgumentsThatAreAlwaysConstant();
120
121/// Check that these arguments are really constant.
122for (auto arg_num : arguments_to_remain_constants)
123if (arg_num < args.size() && !isColumnConst(*args[arg_num].column))
124throw Exception(ErrorCodes::ILLEGAL_COLUMN,
125"Argument at index {} for function {} must be constant",
126arg_num,
127getName());
128
129if (args.empty() || !useDefaultImplementationForConstants() || !allArgumentsAreConstants(args))
130return nullptr;
131
132ColumnsWithTypeAndName temporary_columns;
133bool have_converted_columns = false;
134
135size_t arguments_size = args.size();
136temporary_columns.reserve(arguments_size);
137for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num)
138{
139const ColumnWithTypeAndName & column = args[arg_num];
140
141if (arguments_to_remain_constants.end() != std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
142{
143temporary_columns.emplace_back(ColumnWithTypeAndName{column.column->cloneResized(1), column.type, column.name});
144}
145else
146{
147have_converted_columns = true;
148temporary_columns.emplace_back(ColumnWithTypeAndName{ assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name });
149}
150}
151
152/** When using default implementation for constants, the function requires at least one argument
153* not in "arguments_to_remain_constants" set. Otherwise we get infinite recursion.
154*/
155if (!have_converted_columns)
156throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
157"Number of arguments for function {} doesn't match: the function requires more arguments",
158getName());
159
160ColumnPtr result_column = executeWithoutLowCardinalityColumns(temporary_columns, result_type, 1, dry_run);
161
162/// extremely rare case, when we have function with completely const arguments
163/// but some of them produced by non isDeterministic function
164if (result_column->size() > 1)
165result_column = result_column->cloneResized(1);
166
167return ColumnConst::create(result_column, input_rows_count);
168}
169
170
171ColumnPtr IExecutableFunction::defaultImplementationForNulls(
172const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
173{
174if (args.empty() || !useDefaultImplementationForNulls())
175return nullptr;
176
177NullPresence null_presence = getNullPresense(args);
178
179if (null_presence.has_null_constant)
180{
181// Default implementation for nulls returns null result for null arguments,
182// so the result type must be nullable.
183if (!result_type->isNullable())
184throw Exception(
185ErrorCodes::LOGICAL_ERROR,
186"Function {} with Null argument and default implementation for Nulls "
187"is expected to return Nullable result, got {}",
188getName(),
189result_type->getName());
190
191return result_type->createColumnConstWithDefaultValue(input_rows_count);
192}
193
194if (null_presence.has_nullable)
195{
196ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
197auto temporary_result_type = removeNullable(result_type);
198
199auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
200return wrapInNullable(res, args, result_type, input_rows_count);
201}
202
203return nullptr;
204}
205
206ColumnPtr IExecutableFunction::defaultImplementationForNothing(
207const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const
208{
209if (!useDefaultImplementationForNothing())
210return nullptr;
211
212bool is_nothing_type_presented = false;
213for (const auto & arg : args)
214is_nothing_type_presented |= isNothing(arg.type);
215
216if (!is_nothing_type_presented)
217return nullptr;
218
219if (!isNothing(result_type))
220throw Exception(
221ErrorCodes::LOGICAL_ERROR,
222"Function {} with argument with type Nothing and default implementation for Nothing "
223"is expected to return result with type Nothing, got {}",
224getName(),
225result_type->getName());
226
227if (input_rows_count > 0)
228throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create non-empty column with type Nothing");
229return ColumnNothing::create(0);
230}
231
232ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns(
233const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
234{
235if (auto res = defaultImplementationForNothing(args, result_type, input_rows_count))
236return res;
237
238if (auto res = defaultImplementationForConstantArguments(args, result_type, input_rows_count, dry_run))
239return res;
240
241if (auto res = defaultImplementationForNulls(args, result_type, input_rows_count, dry_run))
242return res;
243
244ColumnPtr res;
245if (dry_run)
246res = executeDryRunImpl(args, result_type, input_rows_count);
247else
248res = executeImpl(args, result_type, input_rows_count);
249
250if (!res)
251throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column was returned by function {}", getName());
252
253return res;
254}
255
256static void convertSparseColumnsToFull(ColumnsWithTypeAndName & args)
257{
258for (auto & column : args)
259column.column = recursiveRemoveSparse(column.column);
260}
261
262ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
263{
264ColumnPtr result;
265if (useDefaultImplementationForLowCardinalityColumns())
266{
267ColumnsWithTypeAndName columns_without_low_cardinality = arguments;
268
269if (const auto * res_low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(result_type.get()))
270{
271bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
272
273const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
274ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
275columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
276
277size_t new_input_rows_count = columns_without_low_cardinality.empty()
278? input_rows_count
279: columns_without_low_cardinality.front().column->size();
280
281auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
282bool res_is_constant = isColumnConst(*res);
283
284auto keys = res_is_constant
285? res->cloneResized(1)->convertToFullColumnIfConst()
286: res;
287
288auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
289ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());
290ColumnUniquePtr res_dictionary = std::move(res_mut_dictionary);
291
292if (indexes && !res_is_constant)
293result = ColumnLowCardinality::create(res_dictionary, res_indexes->index(*indexes, 0));
294else
295result = ColumnLowCardinality::create(res_dictionary, res_indexes);
296
297if (res_is_constant)
298result = ColumnConst::create(std::move(result), input_rows_count);
299}
300else
301{
302convertLowCardinalityColumnsToFull(columns_without_low_cardinality);
303result = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, result_type, input_rows_count, dry_run);
304}
305}
306else
307result = executeWithoutLowCardinalityColumns(arguments, result_type, input_rows_count, dry_run);
308
309return result;
310}
311
312ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
313{
314bool use_default_implementation_for_sparse_columns = useDefaultImplementationForSparseColumns();
315/// DataTypeFunction does not support obtaining default (isDefaultAt())
316/// ColumnFunction does not support getting specific values.
317if (result_type->getTypeId() != TypeIndex::Function && use_default_implementation_for_sparse_columns)
318{
319size_t num_sparse_columns = 0;
320size_t num_full_columns = 0;
321size_t sparse_column_position = 0;
322
323for (size_t i = 0; i < arguments.size(); ++i)
324{
325const auto * column_sparse = checkAndGetColumn<ColumnSparse>(arguments[i].column.get());
326/// In rare case, when sparse column doesn't have default values,
327/// it's more convenient to convert it to full before execution of function.
328if (column_sparse && column_sparse->getNumberOfDefaultRows())
329{
330sparse_column_position = i;
331++num_sparse_columns;
332}
333else if (!isColumnConst(*arguments[i].column))
334{
335++num_full_columns;
336}
337}
338
339auto columns_without_sparse = arguments;
340if (num_sparse_columns == 1 && num_full_columns == 0)
341{
342auto & arg_with_sparse = columns_without_sparse[sparse_column_position];
343ColumnPtr sparse_offsets;
344{
345/// New scope to avoid possible mistakes on dangling reference.
346const auto & column_sparse = assert_cast<const ColumnSparse &>(*arg_with_sparse.column);
347sparse_offsets = column_sparse.getOffsetsPtr();
348arg_with_sparse.column = column_sparse.getValuesPtr();
349}
350
351size_t values_size = arg_with_sparse.column->size();
352for (size_t i = 0; i < columns_without_sparse.size(); ++i)
353{
354if (i == sparse_column_position)
355continue;
356
357columns_without_sparse[i].column = columns_without_sparse[i].column->cloneResized(values_size);
358}
359
360auto res = executeWithoutSparseColumns(columns_without_sparse, result_type, values_size, dry_run);
361
362if (isColumnConst(*res))
363return res->cloneResized(input_rows_count);
364
365/// If default of sparse column is changed after execution of function, convert to full column.
366/// If there are any default in non-zero position after execution of function, convert to full column.
367/// Currently there is no easy way to rebuild sparse column with new offsets.
368if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
369{
370const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
371return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/ 1);
372}
373
374return ColumnSparse::create(res, sparse_offsets, input_rows_count);
375}
376
377convertSparseColumnsToFull(columns_without_sparse);
378return executeWithoutSparseColumns(columns_without_sparse, result_type, input_rows_count, dry_run);
379}
380else if (use_default_implementation_for_sparse_columns)
381{
382auto columns_without_sparse = arguments;
383convertSparseColumnsToFull(columns_without_sparse);
384return executeWithoutSparseColumns(columns_without_sparse, result_type, input_rows_count, dry_run);
385}
386else
387return executeWithoutSparseColumns(arguments, result_type, input_rows_count, dry_run);
388}
389
390void IFunctionOverloadResolver::checkNumberOfArguments(size_t number_of_arguments) const
391{
392if (isVariadic())
393return;
394
395size_t expected_number_of_arguments = getNumberOfArguments();
396
397if (number_of_arguments != expected_number_of_arguments)
398throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
399"Number of arguments for function {} doesn't match: passed {}, should be {}",
400getName(),
401number_of_arguments,
402expected_number_of_arguments);
403}
404
405DataTypePtr IFunctionOverloadResolver::getReturnType(const ColumnsWithTypeAndName & arguments) const
406{
407if (useDefaultImplementationForLowCardinalityColumns())
408{
409bool has_low_cardinality = false;
410size_t num_full_low_cardinality_columns = 0;
411size_t num_full_ordinary_columns = 0;
412
413ColumnsWithTypeAndName args_without_low_cardinality(arguments);
414
415for (ColumnWithTypeAndName & arg : args_without_low_cardinality)
416{
417bool is_const = arg.column && isColumnConst(*arg.column);
418if (is_const)
419arg.column = assert_cast<const ColumnConst &>(*arg.column).removeLowCardinality();
420
421if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(arg.type.get()))
422{
423arg.type = low_cardinality_type->getDictionaryType();
424has_low_cardinality = true;
425
426if (!is_const)
427++num_full_low_cardinality_columns;
428}
429else if (!is_const)
430++num_full_ordinary_columns;
431}
432
433convertLowCardinalityColumnsToFull(args_without_low_cardinality);
434
435auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality);
436
437if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality
438&& num_full_low_cardinality_columns <= 1 && num_full_ordinary_columns == 0
439&& type_without_low_cardinality->canBeInsideLowCardinality())
440return std::make_shared<DataTypeLowCardinality>(type_without_low_cardinality);
441else
442return type_without_low_cardinality;
443}
444
445return getReturnTypeWithoutLowCardinality(arguments);
446}
447
448FunctionBasePtr IFunctionOverloadResolver::build(const ColumnsWithTypeAndName & arguments) const
449{
450auto return_type = getReturnType(arguments);
451return buildImpl(arguments, return_type);
452}
453
454void IFunctionOverloadResolver::getLambdaArgumentTypes(DataTypes & arguments [[maybe_unused]]) const
455{
456checkNumberOfArguments(arguments.size());
457return getLambdaArgumentTypesImpl(arguments);
458}
459
460DataTypePtr IFunctionOverloadResolver::getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const
461{
462checkNumberOfArguments(arguments.size());
463
464if (!arguments.empty() && useDefaultImplementationForNothing())
465{
466for (const auto & arg : arguments)
467{
468if (isNothing(arg.type))
469return std::make_shared<DataTypeNothing>();
470}
471}
472
473if (!arguments.empty() && useDefaultImplementationForNulls())
474{
475NullPresence null_presence = getNullPresense(arguments);
476
477if (null_presence.has_null_constant)
478{
479return makeNullable(std::make_shared<DataTypeNothing>());
480}
481if (null_presence.has_nullable)
482{
483Block nested_columns = createBlockWithNestedColumns(arguments);
484auto return_type = getReturnTypeImpl(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end()));
485return makeNullable(return_type);
486}
487}
488
489return getReturnTypeImpl(arguments);
490}
491
492
493#if USE_EMBEDDED_COMPILER
494
495static std::optional<DataTypes> removeNullables(const DataTypes & types)
496{
497bool has_nullable = false;
498for (const auto & type : types)
499{
500if (!typeid_cast<const DataTypeNullable *>(type.get()))
501continue;
502
503has_nullable = true;
504break;
505}
506
507if (has_nullable)
508{
509DataTypes filtered;
510filtered.reserve(types.size());
511
512for (const auto & sub_type : types)
513filtered.emplace_back(removeNullable(sub_type));
514
515return filtered;
516}
517
518return {};
519}
520
521bool IFunction::isCompilable(const DataTypes & arguments, const DataTypePtr & result_type) const
522{
523if (useDefaultImplementationForNulls())
524if (auto denulled_arguments = removeNullables(arguments))
525return isCompilableImpl(*denulled_arguments, result_type);
526
527return isCompilableImpl(arguments, result_type);
528}
529
530llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const ValuesWithType & arguments, const DataTypePtr & result_type) const
531{
532DataTypes arguments_types;
533arguments_types.reserve(arguments.size());
534
535for (const auto & argument : arguments)
536arguments_types.push_back(argument.type);
537
538auto denulled_arguments_types = removeNullables(arguments_types);
539if (useDefaultImplementationForNulls() && denulled_arguments_types)
540{
541auto & b = static_cast<llvm::IRBuilder<> &>(builder);
542
543ValuesWithType unwrapped_arguments;
544unwrapped_arguments.reserve(arguments.size());
545
546std::vector<llvm::Value*> is_null_values;
547
548for (size_t i = 0; i < arguments.size(); ++i)
549{
550const auto & argument = arguments[i];
551llvm::Value * unwrapped_value = argument.value;
552
553if (argument.type->isNullable())
554{
555unwrapped_value = b.CreateExtractValue(argument.value, {0});
556is_null_values.emplace_back(b.CreateExtractValue(argument.value, {1}));
557}
558
559unwrapped_arguments.emplace_back(unwrapped_value, (*denulled_arguments_types)[i]);
560}
561
562auto * result = compileImpl(builder, unwrapped_arguments, removeNullable(result_type));
563
564auto * nullable_structure_type = toNativeType(b, makeNullable(getReturnTypeImpl(*denulled_arguments_types)));
565auto * nullable_structure_value = llvm::Constant::getNullValue(nullable_structure_type);
566
567auto * nullable_structure_with_result_value = b.CreateInsertValue(nullable_structure_value, result, {0});
568auto * nullable_structure_result_null = b.CreateExtractValue(nullable_structure_with_result_value, {1});
569
570for (auto * is_null_value : is_null_values)
571nullable_structure_result_null = b.CreateOr(nullable_structure_result_null, is_null_value);
572
573return b.CreateInsertValue(nullable_structure_with_result_value, nullable_structure_result_null, {1});
574}
575
576return compileImpl(builder, arguments, result_type);
577}
578
579#endif
580
581}
582