ClickHouse

Форк
0
/
IFunction.cpp 
581 строка · 21.9 Кб
1
#include <Functions/IFunctionAdaptors.h>
2

3
#include <Common/typeid_cast.h>
4
#include <Common/assert_cast.h>
5
#include <Common/SipHash.h>
6
#include <Core/Block.h>
7
#include <Core/TypeId.h>
8
#include <Columns/ColumnConst.h>
9
#include <Columns/ColumnNullable.h>
10
#include <Columns/ColumnTuple.h>
11
#include <Columns/ColumnLowCardinality.h>
12
#include <Columns/ColumnSparse.h>
13
#include <Columns/ColumnNothing.h>
14
#include <DataTypes/DataTypeNothing.h>
15
#include <DataTypes/DataTypeNullable.h>
16
#include <DataTypes/Native.h>
17
#include <DataTypes/DataTypeLowCardinality.h>
18
#include <Functions/FunctionHelpers.h>
19
#include <cstdlib>
20
#include <memory>
21

22
#include "config.h"
23

24
#if USE_EMBEDDED_COMPILER
25
#    include <llvm/IR/IRBuilder.h>
26
#endif
27

28

29
namespace DB
30
{
31

32
namespace ErrorCodes
33
{
34
    extern const int LOGICAL_ERROR;
35
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
36
    extern const int ILLEGAL_COLUMN;
37
}
38

39
namespace
40
{
41

42
bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args)
43
{
44
    for (const auto & arg : args)
45
        if (!isColumnConst(*arg.column))
46
            return false;
47
    return true;
48
}
49

50
ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
51
    ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
52
{
53
    size_t num_rows = input_rows_count;
54
    ColumnPtr indexes;
55

56
    /// Find first LowCardinality column and replace it to nested dictionary.
57
    for (auto & column : args)
58
    {
59
        if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
60
        {
61
            /// Single LowCardinality column is supported now.
62
            if (indexes)
63
                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function.");
64

65
            const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
66

67
            if (!low_cardinality_type)
68
                throw Exception(ErrorCodes::LOGICAL_ERROR,
69
                    "Incompatible type for LowCardinality column: {}",
70
                    column.type->getName());
71

72
            if (can_be_executed_on_default_arguments)
73
            {
74
                /// Normal case, when function can be executed on values' default.
75
                column.column = low_cardinality_column->getDictionary().getNestedColumn();
76
                indexes = low_cardinality_column->getIndexesPtr();
77
            }
78
            else
79
            {
80
                /// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
81
                /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
82
                auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
83
                column.column = dict_encoded.dictionary;
84
                indexes = dict_encoded.indexes;
85
            }
86

87
            num_rows = column.column->size();
88
            column.type = low_cardinality_type->getDictionaryType();
89
        }
90
    }
91

92
    /// Change size of constants.
93
    for (auto & column : args)
94
    {
95
        if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
96
        {
97
            column.column = column_const->removeLowCardinality()->cloneResized(num_rows);
98
            column.type = removeLowCardinality(column.type);
99
        }
100
    }
101

102
    return indexes;
103
}
104

105
void convertLowCardinalityColumnsToFull(ColumnsWithTypeAndName & args)
106
{
107
    for (auto & column : args)
108
    {
109
        column.column = recursiveRemoveLowCardinality(column.column);
110
        column.type = recursiveRemoveLowCardinality(column.type);
111
    }
112
}
113

114
}
115

116
ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
117
    const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
118
{
119
    ColumnNumbers arguments_to_remain_constants = getArgumentsThatAreAlwaysConstant();
120

121
    /// Check that these arguments are really constant.
122
    for (auto arg_num : arguments_to_remain_constants)
123
        if (arg_num < args.size() && !isColumnConst(*args[arg_num].column))
124
            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
125
                "Argument at index {} for function {} must be constant",
126
                arg_num,
127
                getName());
128

129
    if (args.empty() || !useDefaultImplementationForConstants() || !allArgumentsAreConstants(args))
130
        return nullptr;
131

132
    ColumnsWithTypeAndName temporary_columns;
133
    bool have_converted_columns = false;
134

135
    size_t arguments_size = args.size();
136
    temporary_columns.reserve(arguments_size);
137
    for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num)
138
    {
139
        const ColumnWithTypeAndName & column = args[arg_num];
140

141
        if (arguments_to_remain_constants.end() != std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
142
        {
143
            temporary_columns.emplace_back(ColumnWithTypeAndName{column.column->cloneResized(1), column.type, column.name});
144
        }
145
        else
146
        {
147
            have_converted_columns = true;
148
            temporary_columns.emplace_back(ColumnWithTypeAndName{ assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name });
149
        }
150
    }
151

152
    /** When using default implementation for constants, the function requires at least one argument
153
      *  not in "arguments_to_remain_constants" set. Otherwise we get infinite recursion.
154
      */
155
    if (!have_converted_columns)
156
        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
157
            "Number of arguments for function {} doesn't match: the function requires more arguments",
158
            getName());
159

160
    ColumnPtr result_column = executeWithoutLowCardinalityColumns(temporary_columns, result_type, 1, dry_run);
161

162
    /// extremely rare case, when we have function with completely const arguments
163
    /// but some of them produced by non isDeterministic function
164
    if (result_column->size() > 1)
165
        result_column = result_column->cloneResized(1);
166

167
    return ColumnConst::create(result_column, input_rows_count);
168
}
169

170

171
ColumnPtr IExecutableFunction::defaultImplementationForNulls(
172
    const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
173
{
174
    if (args.empty() || !useDefaultImplementationForNulls())
175
        return nullptr;
176

177
    NullPresence null_presence = getNullPresense(args);
178

179
    if (null_presence.has_null_constant)
180
    {
181
        // Default implementation for nulls returns null result for null arguments,
182
        // so the result type must be nullable.
183
        if (!result_type->isNullable())
184
            throw Exception(
185
                ErrorCodes::LOGICAL_ERROR,
186
                "Function {} with Null argument and default implementation for Nulls "
187
                "is expected to return Nullable result, got {}",
188
                getName(),
189
                result_type->getName());
190

191
        return result_type->createColumnConstWithDefaultValue(input_rows_count);
192
    }
193

194
    if (null_presence.has_nullable)
195
    {
196
        ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
197
        auto temporary_result_type = removeNullable(result_type);
198

199
        auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
200
        return wrapInNullable(res, args, result_type, input_rows_count);
201
    }
202

203
    return nullptr;
204
}
205

206
ColumnPtr IExecutableFunction::defaultImplementationForNothing(
207
    const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const
208
{
209
    if (!useDefaultImplementationForNothing())
210
        return nullptr;
211

212
    bool is_nothing_type_presented = false;
213
    for (const auto & arg : args)
214
        is_nothing_type_presented |= isNothing(arg.type);
215

216
    if (!is_nothing_type_presented)
217
        return nullptr;
218

219
    if (!isNothing(result_type))
220
        throw Exception(
221
            ErrorCodes::LOGICAL_ERROR,
222
            "Function {} with argument with type Nothing and default implementation for Nothing "
223
            "is expected to return result with type Nothing, got {}",
224
            getName(),
225
            result_type->getName());
226

227
    if (input_rows_count > 0)
228
        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create non-empty column with type Nothing");
229
    return ColumnNothing::create(0);
230
}
231

232
ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns(
233
    const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
234
{
235
    if (auto res = defaultImplementationForNothing(args, result_type, input_rows_count))
236
        return res;
237

238
    if (auto res = defaultImplementationForConstantArguments(args, result_type, input_rows_count, dry_run))
239
        return res;
240

241
    if (auto res = defaultImplementationForNulls(args, result_type, input_rows_count, dry_run))
242
        return res;
243

244
    ColumnPtr res;
245
    if (dry_run)
246
        res = executeDryRunImpl(args, result_type, input_rows_count);
247
    else
248
        res = executeImpl(args, result_type, input_rows_count);
249

250
    if (!res)
251
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column was returned by function {}", getName());
252

253
    return res;
254
}
255

256
static void convertSparseColumnsToFull(ColumnsWithTypeAndName & args)
257
{
258
    for (auto & column : args)
259
        column.column = recursiveRemoveSparse(column.column);
260
}
261

262
ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
263
{
264
    ColumnPtr result;
265
    if (useDefaultImplementationForLowCardinalityColumns())
266
    {
267
        ColumnsWithTypeAndName columns_without_low_cardinality = arguments;
268

269
        if (const auto * res_low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(result_type.get()))
270
        {
271
            bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
272

273
            const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
274
            ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
275
                    columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
276

277
            size_t new_input_rows_count = columns_without_low_cardinality.empty()
278
                                        ? input_rows_count
279
                                        : columns_without_low_cardinality.front().column->size();
280

281
            auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
282
            bool res_is_constant = isColumnConst(*res);
283

284
            auto keys = res_is_constant
285
                ? res->cloneResized(1)->convertToFullColumnIfConst()
286
                : res;
287

288
            auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
289
            ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());
290
            ColumnUniquePtr res_dictionary = std::move(res_mut_dictionary);
291

292
            if (indexes && !res_is_constant)
293
                result = ColumnLowCardinality::create(res_dictionary, res_indexes->index(*indexes, 0));
294
            else
295
                result = ColumnLowCardinality::create(res_dictionary, res_indexes);
296

297
            if (res_is_constant)
298
                result = ColumnConst::create(std::move(result), input_rows_count);
299
        }
300
        else
301
        {
302
            convertLowCardinalityColumnsToFull(columns_without_low_cardinality);
303
            result = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, result_type, input_rows_count, dry_run);
304
        }
305
    }
306
    else
307
        result = executeWithoutLowCardinalityColumns(arguments, result_type, input_rows_count, dry_run);
308

309
    return result;
310
}
311

312
ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
313
{
314
    bool use_default_implementation_for_sparse_columns = useDefaultImplementationForSparseColumns();
315
    /// DataTypeFunction does not support obtaining default (isDefaultAt())
316
    /// ColumnFunction does not support getting specific values.
317
    if (result_type->getTypeId() != TypeIndex::Function && use_default_implementation_for_sparse_columns)
318
    {
319
        size_t num_sparse_columns = 0;
320
        size_t num_full_columns = 0;
321
        size_t sparse_column_position = 0;
322

323
        for (size_t i = 0; i < arguments.size(); ++i)
324
        {
325
            const auto * column_sparse = checkAndGetColumn<ColumnSparse>(arguments[i].column.get());
326
            /// In rare case, when sparse column doesn't have default values,
327
            /// it's more convenient to convert it to full before execution of function.
328
            if (column_sparse && column_sparse->getNumberOfDefaultRows())
329
            {
330
                sparse_column_position = i;
331
                ++num_sparse_columns;
332
            }
333
            else if (!isColumnConst(*arguments[i].column))
334
            {
335
                ++num_full_columns;
336
            }
337
        }
338

339
        auto columns_without_sparse = arguments;
340
        if (num_sparse_columns == 1 && num_full_columns == 0)
341
        {
342
            auto & arg_with_sparse = columns_without_sparse[sparse_column_position];
343
            ColumnPtr sparse_offsets;
344
            {
345
                /// New scope to avoid possible mistakes on dangling reference.
346
                const auto & column_sparse = assert_cast<const ColumnSparse &>(*arg_with_sparse.column);
347
                sparse_offsets = column_sparse.getOffsetsPtr();
348
                arg_with_sparse.column = column_sparse.getValuesPtr();
349
            }
350

351
            size_t values_size = arg_with_sparse.column->size();
352
            for (size_t i = 0; i < columns_without_sparse.size(); ++i)
353
            {
354
                if (i == sparse_column_position)
355
                    continue;
356

357
                columns_without_sparse[i].column = columns_without_sparse[i].column->cloneResized(values_size);
358
            }
359

360
            auto res = executeWithoutSparseColumns(columns_without_sparse, result_type, values_size, dry_run);
361

362
            if (isColumnConst(*res))
363
                return res->cloneResized(input_rows_count);
364

365
            /// If default of sparse column is changed after execution of function, convert to full column.
366
            /// If there are any default in non-zero position after execution of function, convert to full column.
367
            /// Currently there is no easy way to rebuild sparse column with new offsets.
368
            if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
369
            {
370
                const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
371
                return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/ 1);
372
            }
373

374
            return ColumnSparse::create(res, sparse_offsets, input_rows_count);
375
        }
376

377
        convertSparseColumnsToFull(columns_without_sparse);
378
        return executeWithoutSparseColumns(columns_without_sparse, result_type, input_rows_count, dry_run);
379
    }
380
    else if (use_default_implementation_for_sparse_columns)
381
    {
382
        auto columns_without_sparse = arguments;
383
        convertSparseColumnsToFull(columns_without_sparse);
384
        return executeWithoutSparseColumns(columns_without_sparse, result_type, input_rows_count, dry_run);
385
    }
386
    else
387
        return executeWithoutSparseColumns(arguments, result_type, input_rows_count, dry_run);
388
}
389

390
void IFunctionOverloadResolver::checkNumberOfArguments(size_t number_of_arguments) const
391
{
392
    if (isVariadic())
393
        return;
394

395
    size_t expected_number_of_arguments = getNumberOfArguments();
396

397
    if (number_of_arguments != expected_number_of_arguments)
398
        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
399
            "Number of arguments for function {} doesn't match: passed {}, should be {}",
400
            getName(),
401
            number_of_arguments,
402
            expected_number_of_arguments);
403
}
404

405
DataTypePtr IFunctionOverloadResolver::getReturnType(const ColumnsWithTypeAndName & arguments) const
406
{
407
    if (useDefaultImplementationForLowCardinalityColumns())
408
    {
409
        bool has_low_cardinality = false;
410
        size_t num_full_low_cardinality_columns = 0;
411
        size_t num_full_ordinary_columns = 0;
412

413
        ColumnsWithTypeAndName args_without_low_cardinality(arguments);
414

415
        for (ColumnWithTypeAndName & arg : args_without_low_cardinality)
416
        {
417
            bool is_const = arg.column && isColumnConst(*arg.column);
418
            if (is_const)
419
                arg.column = assert_cast<const ColumnConst &>(*arg.column).removeLowCardinality();
420

421
            if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(arg.type.get()))
422
            {
423
                arg.type = low_cardinality_type->getDictionaryType();
424
                has_low_cardinality = true;
425

426
                if (!is_const)
427
                    ++num_full_low_cardinality_columns;
428
            }
429
            else if (!is_const)
430
                ++num_full_ordinary_columns;
431
        }
432

433
        convertLowCardinalityColumnsToFull(args_without_low_cardinality);
434

435
        auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality);
436

437
        if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality
438
            && num_full_low_cardinality_columns <= 1 && num_full_ordinary_columns == 0
439
            && type_without_low_cardinality->canBeInsideLowCardinality())
440
            return std::make_shared<DataTypeLowCardinality>(type_without_low_cardinality);
441
        else
442
            return type_without_low_cardinality;
443
    }
444

445
    return getReturnTypeWithoutLowCardinality(arguments);
446
}
447

448
FunctionBasePtr IFunctionOverloadResolver::build(const ColumnsWithTypeAndName & arguments) const
449
{
450
    auto return_type = getReturnType(arguments);
451
    return buildImpl(arguments, return_type);
452
}
453

454
void IFunctionOverloadResolver::getLambdaArgumentTypes(DataTypes & arguments [[maybe_unused]]) const
455
{
456
    checkNumberOfArguments(arguments.size());
457
    return getLambdaArgumentTypesImpl(arguments);
458
}
459

460
DataTypePtr IFunctionOverloadResolver::getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const
461
{
462
    checkNumberOfArguments(arguments.size());
463

464
    if (!arguments.empty() && useDefaultImplementationForNothing())
465
    {
466
        for (const auto & arg : arguments)
467
        {
468
            if (isNothing(arg.type))
469
                return std::make_shared<DataTypeNothing>();
470
        }
471
    }
472

473
    if (!arguments.empty() && useDefaultImplementationForNulls())
474
    {
475
        NullPresence null_presence = getNullPresense(arguments);
476

477
        if (null_presence.has_null_constant)
478
        {
479
            return makeNullable(std::make_shared<DataTypeNothing>());
480
        }
481
        if (null_presence.has_nullable)
482
        {
483
            Block nested_columns = createBlockWithNestedColumns(arguments);
484
            auto return_type = getReturnTypeImpl(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end()));
485
            return makeNullable(return_type);
486
        }
487
    }
488

489
    return getReturnTypeImpl(arguments);
490
}
491

492

493
#if USE_EMBEDDED_COMPILER
494

495
static std::optional<DataTypes> removeNullables(const DataTypes & types)
496
{
497
    bool has_nullable = false;
498
    for (const auto & type : types)
499
    {
500
        if (!typeid_cast<const DataTypeNullable *>(type.get()))
501
            continue;
502

503
        has_nullable = true;
504
        break;
505
    }
506

507
    if (has_nullable)
508
    {
509
        DataTypes filtered;
510
        filtered.reserve(types.size());
511

512
        for (const auto & sub_type : types)
513
            filtered.emplace_back(removeNullable(sub_type));
514

515
        return filtered;
516
    }
517

518
    return {};
519
}
520

521
bool IFunction::isCompilable(const DataTypes & arguments, const DataTypePtr & result_type) const
522
{
523
    if (useDefaultImplementationForNulls())
524
        if (auto denulled_arguments = removeNullables(arguments))
525
            return isCompilableImpl(*denulled_arguments, result_type);
526

527
    return isCompilableImpl(arguments, result_type);
528
}
529

530
llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const ValuesWithType & arguments, const DataTypePtr & result_type) const
531
{
532
    DataTypes arguments_types;
533
    arguments_types.reserve(arguments.size());
534

535
    for (const auto & argument : arguments)
536
        arguments_types.push_back(argument.type);
537

538
    auto denulled_arguments_types = removeNullables(arguments_types);
539
    if (useDefaultImplementationForNulls() && denulled_arguments_types)
540
    {
541
        auto & b = static_cast<llvm::IRBuilder<> &>(builder);
542

543
        ValuesWithType unwrapped_arguments;
544
        unwrapped_arguments.reserve(arguments.size());
545

546
        std::vector<llvm::Value*> is_null_values;
547

548
        for (size_t i = 0; i < arguments.size(); ++i)
549
        {
550
            const auto & argument = arguments[i];
551
            llvm::Value * unwrapped_value = argument.value;
552

553
            if (argument.type->isNullable())
554
            {
555
                unwrapped_value = b.CreateExtractValue(argument.value, {0});
556
                is_null_values.emplace_back(b.CreateExtractValue(argument.value, {1}));
557
            }
558

559
            unwrapped_arguments.emplace_back(unwrapped_value, (*denulled_arguments_types)[i]);
560
        }
561

562
        auto * result = compileImpl(builder, unwrapped_arguments, removeNullable(result_type));
563

564
        auto * nullable_structure_type = toNativeType(b, makeNullable(getReturnTypeImpl(*denulled_arguments_types)));
565
        auto * nullable_structure_value = llvm::Constant::getNullValue(nullable_structure_type);
566

567
        auto * nullable_structure_with_result_value = b.CreateInsertValue(nullable_structure_value, result, {0});
568
        auto * nullable_structure_result_null = b.CreateExtractValue(nullable_structure_with_result_value, {1});
569

570
        for (auto * is_null_value : is_null_values)
571
            nullable_structure_result_null = b.CreateOr(nullable_structure_result_null, is_null_value);
572

573
        return b.CreateInsertValue(nullable_structure_with_result_value, nullable_structure_result_null, {1});
574
    }
575

576
    return compileImpl(builder, arguments, result_type);
577
}
578

579
#endif
580

581
}
582

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.