ClickHouse

Форк
0
/
concat.cpp 
249 строк · 9.8 Кб
1
#include <Columns/ColumnString.h>
2
#include <Columns/ColumnStringHelpers.h>
3
#include <DataTypes/DataTypeString.h>
4
#include <Functions/FunctionFactory.h>
5
#include <Functions/FunctionHelpers.h>
6
#include <Functions/GatherUtils/Algorithms.h>
7
#include <Functions/GatherUtils/Sinks.h>
8
#include <Functions/GatherUtils/Sources.h>
9
#include <Functions/IFunction.h>
10
#include <Functions/formatString.h>
11
#include <IO/WriteHelpers.h>
12
#include <base/map.h>
13

14

15
namespace DB
16
{
17
namespace ErrorCodes
18
{
19
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
20
}
21

22
using namespace GatherUtils;
23

24
namespace
25
{
26

27
template <typename Name, bool is_injective>
28
class ConcatImpl : public IFunction
29
{
30
public:
31
    static constexpr auto name = Name::name;
32
    explicit ConcatImpl(ContextPtr context_) : context(context_) { }
33
    static FunctionPtr create(ContextPtr context) { return std::make_shared<ConcatImpl>(context); }
34

35
    String getName() const override { return name; }
36

37
    bool isVariadic() const override { return true; }
38

39
    size_t getNumberOfArguments() const override { return 0; }
40

41
    bool isInjective(const ColumnsWithTypeAndName &) const override { return is_injective; }
42

43
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
44

45
    bool useDefaultImplementationForConstants() const override { return true; }
46

47
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
48
    {
49
        if (arguments.size() < 2)
50
            throw Exception(
51
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
52
                "Number of arguments for function {} doesn't match: passed {}, should be at least 2",
53
                getName(),
54
                arguments.size());
55

56
        return std::make_shared<DataTypeString>();
57
    }
58

59
    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
60
    {
61
        /// Format function is not proven to be faster for two arguments.
62
        /// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl.
63
        /// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%).
64
        /// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%).
65
        if (arguments.size() == 2)
66
            return executeBinary(arguments, input_rows_count);
67
        return executeFormatImpl(arguments, input_rows_count);
68
    }
69

70
private:
71
    ContextWeakPtr context;
72

73
    ColumnPtr executeBinary(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
74
    {
75
        const IColumn * c0 = arguments[0].column.get();
76
        const IColumn * c1 = arguments[1].column.get();
77

78
        const ColumnString * c0_string = checkAndGetColumn<ColumnString>(c0);
79
        const ColumnString * c1_string = checkAndGetColumn<ColumnString>(c1);
80
        const ColumnConst * c0_const_string = checkAndGetColumnConst<ColumnString>(c0);
81
        const ColumnConst * c1_const_string = checkAndGetColumnConst<ColumnString>(c1);
82

83
        auto col_res = ColumnString::create();
84

85
        if (c0_string && c1_string)
86
            concat(StringSource(*c0_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
87
        else if (c0_string && c1_const_string)
88
            concat(StringSource(*c0_string), ConstSource<StringSource>(*c1_const_string), StringSink(*col_res, c0->size()));
89
        else if (c0_const_string && c1_string)
90
            concat(ConstSource<StringSource>(*c0_const_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
91
        else
92
        {
93
            /// Fallback: use generic implementation for not very important cases.
94
            return executeFormatImpl(arguments, input_rows_count);
95
        }
96

97
        return col_res;
98
    }
99

100
    ColumnPtr executeFormatImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
101
    {
102
        const size_t num_arguments = arguments.size();
103
        assert(num_arguments >= 2);
104

105
        auto col_res = ColumnString::create();
106
        std::vector<const ColumnString::Chars *> data(num_arguments);
107
        std::vector<const ColumnString::Offsets *> offsets(num_arguments);
108
        std::vector<size_t> fixed_string_sizes(num_arguments);
109
        std::vector<std::optional<String>> constant_strings(num_arguments);
110
        std::vector<ColumnString::MutablePtr> converted_col_ptrs(num_arguments);
111
        bool has_column_string = false;
112
        bool has_column_fixed_string = false;
113
        for (size_t i = 0; i < num_arguments; ++i)
114
        {
115
            const ColumnPtr & column = arguments[i].column;
116
            if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
117
            {
118
                has_column_string = true;
119
                data[i] = &col->getChars();
120
                offsets[i] = &col->getOffsets();
121
            }
122
            else if (const ColumnFixedString * fixed_col = checkAndGetColumn<ColumnFixedString>(column.get()))
123
            {
124
                has_column_fixed_string = true;
125
                data[i] = &fixed_col->getChars();
126
                fixed_string_sizes[i] = fixed_col->getN();
127
            }
128
            else if (const ColumnConst * const_col = checkAndGetColumnConstStringOrFixedString(column.get()))
129
            {
130
                constant_strings[i] = const_col->getValue<String>();
131
            }
132
            else
133
            {
134
                /// A non-String/non-FixedString-type argument: use the default serialization to convert it to String
135
                auto full_column = column->convertToFullIfNeeded();
136
                auto serialization = arguments[i].type->getDefaultSerialization();
137
                auto converted_col_str = ColumnString::create();
138
                ColumnStringHelpers::WriteHelper write_helper(*converted_col_str, column->size());
139
                auto & write_buffer = write_helper.getWriteBuffer();
140
                FormatSettings format_settings;
141
                for (size_t row = 0; row < column->size(); ++row)
142
                {
143
                    serialization->serializeText(*full_column, row, write_buffer, format_settings);
144
                    write_helper.rowWritten();
145
                }
146
                write_helper.finalize();
147

148
                /// Keep the pointer alive
149
                converted_col_ptrs[i] = std::move(converted_col_str);
150

151
                /// Same as the normal `ColumnString` branch
152
                has_column_string = true;
153
                data[i] = &converted_col_ptrs[i]->getChars();
154
                offsets[i] = &converted_col_ptrs[i]->getOffsets();
155
            }
156
        }
157

158
        String pattern;
159
        pattern.reserve(2 * num_arguments);
160

161
        for (size_t i = 0; i < num_arguments; ++i)
162
            pattern += "{}";
163

164
        FormatStringImpl::formatExecute(
165
            has_column_string,
166
            has_column_fixed_string,
167
            std::move(pattern),
168
            data,
169
            offsets,
170
            fixed_string_sizes,
171
            constant_strings,
172
            col_res->getChars(),
173
            col_res->getOffsets(),
174
            input_rows_count);
175

176
        return col_res;
177
    }
178
};
179

180

181
struct NameConcat
182
{
183
    static constexpr auto name = "concat";
184
};
185
struct NameConcatAssumeInjective
186
{
187
    static constexpr auto name = "concatAssumeInjective";
188
};
189

190
using FunctionConcat = ConcatImpl<NameConcat, false>;
191
using FunctionConcatAssumeInjective = ConcatImpl<NameConcatAssumeInjective, true>;
192

193

194
/// Works with arrays via `arrayConcat`, maps via `mapConcat`, and tuples via `tupleConcat`.
195
/// Additionally, allows concatenation of arbitrary types that can be cast to string using the corresponding default serialization.
196
class ConcatOverloadResolver : public IFunctionOverloadResolver
197
{
198
public:
199
    static constexpr auto name = "concat";
200
    static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<ConcatOverloadResolver>(context); }
201

202
    explicit ConcatOverloadResolver(ContextPtr context_) : context(context_) { }
203

204
    String getName() const override { return name; }
205
    size_t getNumberOfArguments() const override { return 0; }
206
    bool isVariadic() const override { return true; }
207

208
    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
209
    {
210
        if (arguments.size() == 1)
211
            return FunctionFactory::instance().getImpl("toString", context)->build(arguments);
212
        if (std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); }))
213
            return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
214
        if (std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); }))
215
            return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
216
        if (std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); }))
217
            return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments);
218
        return std::make_unique<FunctionToFunctionBaseAdaptor>(
219
            FunctionConcat::create(context),
220
            collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
221
            return_type);
222
    }
223

224
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
225
    {
226
        if (arguments.empty())
227
            throw Exception(
228
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
229
                "Number of arguments for function {} doesn't match: passed {}, should be at least 1.",
230
                getName(),
231
                arguments.size());
232

233
        /// We always return Strings from concat, even if arguments were fixed strings.
234
        return std::make_shared<DataTypeString>();
235
    }
236

237
private:
238
    ContextPtr context;
239
};
240

241
}
242

243
REGISTER_FUNCTION(Concat)
244
{
245
    factory.registerFunction<ConcatOverloadResolver>({}, FunctionFactory::CaseInsensitive);
246
    factory.registerFunction<FunctionConcatAssumeInjective>();
247
}
248

249
}
250

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.