ClickHouse
156 строк · 6.1 Кб
1#include <memory>
2#include <Columns/ColumnString.h>
3#include <DataTypes/DataTypeString.h>
4#include <Formats/FormatFactory.h>
5#include <Functions/FunctionFactory.h>
6#include <Functions/FunctionHelpers.h>
7#include <Functions/IFunction.h>
8#include <IO/WriteBufferFromVector.h>
9#include <IO/WriteHelpers.h>
10#include <Processors/Formats/IOutputFormat.h>
11#include <Processors/Formats/IRowOutputFormat.h>
12#include <base/map.h>
13
14
15namespace DB
16{
17namespace ErrorCodes
18{
19extern const int ILLEGAL_TYPE_OF_ARGUMENT;
20extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
21extern const int BAD_ARGUMENTS;
22}
23
24namespace
25{
26
27/** formatRow(<format>, x, y, ...) is a function that allows you to use RowOutputFormat over
28* several columns to generate a string per row, such as CSV, TSV, JSONEachRow, etc.
29* formatRowNoNewline(...) trims the newline character of each row.
30*/
31template <bool no_newline>
32class FunctionFormatRow : public IFunction
33{
34public:
35static constexpr auto name = no_newline ? "formatRowNoNewline" : "formatRow";
36
37FunctionFormatRow(String format_name_, Names arguments_column_names_, ContextPtr context_)
38: format_name(std::move(format_name_))
39, arguments_column_names(std::move(arguments_column_names_))
40, context(std::move(context_))
41{
42FormatFactory::instance().checkFormatName(format_name);
43}
44
45String getName() const override { return name; }
46size_t getNumberOfArguments() const override { return 0; }
47bool useDefaultImplementationForNulls() const override { return false; }
48bool useDefaultImplementationForConstants() const override { return true; }
49bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
50ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
51
52ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
53{
54auto col_str = ColumnString::create();
55ColumnString::Chars & vec = col_str->getChars();
56WriteBufferFromVector buffer(vec);
57ColumnString::Offsets & offsets = col_str->getOffsets();
58offsets.resize(input_rows_count);
59
60Block arg_columns;
61
62size_t arguments_size = arguments.size();
63for (size_t i = 1; i < arguments_size; ++i)
64{
65auto argument_column = arguments[i];
66argument_column.name = arguments_column_names[i];
67arg_columns.insert(std::move(argument_column));
68}
69
70materializeBlockInplace(arg_columns);
71auto format_settings = getFormatSettings(context);
72auto out = FormatFactory::instance().getOutputFormat(format_name, buffer, arg_columns, context, format_settings);
73
74/// This function make sense only for row output formats.
75auto * row_output_format = dynamic_cast<IRowOutputFormat *>(out.get());
76if (!row_output_format)
77throw Exception(ErrorCodes::BAD_ARGUMENTS,
78"Cannot turn rows into a {} format strings. {} function supports only row output formats",
79format_name, getName());
80
81auto columns = arg_columns.getColumns();
82for (size_t i = 0; i != input_rows_count; ++i)
83{
84row_output_format->writePrefixIfNeeded();
85row_output_format->writeRow(columns, i);
86row_output_format->finalize();
87if constexpr (no_newline)
88{
89// replace '\n' with '\0'
90if (buffer.position() != buffer.buffer().begin() && buffer.position()[-1] == '\n')
91buffer.position()[-1] = '\0';
92}
93else
94writeChar('\0', buffer);
95
96offsets[i] = buffer.count();
97row_output_format->resetFormatter();
98}
99
100return col_str;
101}
102
103private:
104String format_name;
105Names arguments_column_names;
106ContextPtr context;
107};
108
109template <bool no_newline>
110class FormatRowOverloadResolver : public IFunctionOverloadResolver
111{
112public:
113static constexpr auto name = no_newline ? "formatRowNoNewline" : "formatRow";
114static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<FormatRowOverloadResolver>(context); }
115explicit FormatRowOverloadResolver(ContextPtr context_) : context(context_) { }
116String getName() const override { return name; }
117bool isVariadic() const override { return true; }
118size_t getNumberOfArguments() const override { return 0; }
119ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
120bool useDefaultImplementationForNulls() const override { return false; }
121
122FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
123{
124if (arguments.size() < 2)
125throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
126"Function {} requires at least two arguments: the format name and its output expression(s)", getName());
127
128Names arguments_column_names;
129arguments_column_names.reserve(arguments.size());
130for (const auto & argument : arguments)
131arguments_column_names.push_back(argument.name);
132
133if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments.at(0).column.get()))
134return std::make_unique<FunctionToFunctionBaseAdaptor>(
135std::make_shared<FunctionFormatRow<no_newline>>(name_col->getValue<String>(), std::move(arguments_column_names), context),
136collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
137return_type);
138else
139throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument to {} must be a format name", getName());
140}
141
142DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared<DataTypeString>(); }
143
144private:
145ContextPtr context;
146};
147
148}
149
150REGISTER_FUNCTION(FormatRow)
151{
152factory.registerFunction<FormatRowOverloadResolver<true>>();
153factory.registerFunction<FormatRowOverloadResolver<false>>();
154}
155
156}
157