ClickHouse

Форк
0
338 строк · 13.2 Кб
1
#include <functional>
2
#include <iostream>
3
#include <string_view>
4
#include <boost/program_options.hpp>
5

6
#include <IO/copyData.h>
7
#include <IO/ReadBufferFromFileDescriptor.h>
8
#include <IO/ReadHelpers.h>
9
#include <IO/WriteBufferFromFileDescriptor.h>
10
#include <IO/WriteBufferFromOStream.h>
11
#include <Interpreters/registerInterpreters.h>
12
#include <Parsers/ASTInsertQuery.h>
13
#include <Parsers/ParserQuery.h>
14
#include <Parsers/formatAST.h>
15
#include <Parsers/obfuscateQueries.h>
16
#include <Parsers/parseQuery.h>
17
#include <Common/ErrorCodes.h>
18
#include <Common/StringUtils/StringUtils.h>
19
#include <Common/TerminalSize.h>
20
#include <Core/BaseSettingsProgramOptions.h>
21

22
#include <Interpreters/Context.h>
23
#include <Functions/FunctionFactory.h>
24
#include <Databases/registerDatabases.h>
25
#include <Functions/registerFunctions.h>
26
#include <AggregateFunctions/AggregateFunctionFactory.h>
27
#include <AggregateFunctions/registerAggregateFunctions.h>
28
#include <TableFunctions/TableFunctionFactory.h>
29
#include <TableFunctions/registerTableFunctions.h>
30
#include <Storages/StorageFactory.h>
31
#include <Storages/registerStorages.h>
32
#include <Storages/MergeTree/MergeTreeSettings.h>
33
#include <DataTypes/DataTypeFactory.h>
34
#include <Formats/FormatFactory.h>
35
#include <Formats/registerFormats.h>
36
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
37

38

39
namespace DB::ErrorCodes
40
{
41
    extern const int NOT_IMPLEMENTED;
42
}
43

44
namespace
45
{
46

47
void skipSpacesAndComments(const char*& pos, const char* end, bool print_comments)
48
{
49
    do
50
    {
51
        /// skip spaces to avoid throw exception after last query
52
        while (pos != end && std::isspace(*pos))
53
            ++pos;
54

55
        const char * comment_begin = pos;
56
        /// for skip comment after the last query and to not throw exception
57
        if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-')
58
        {
59
            pos += 2;
60
            /// skip until the end of the line
61
            while (pos != end && *pos != '\n')
62
                ++pos;
63
            if (print_comments)
64
                std::cout << std::string_view(comment_begin, pos - comment_begin) << "\n";
65
        }
66
        /// need to parse next sql
67
        else
68
            break;
69
    } while (pos != end);
70
}
71

72
}
73

74
#pragma clang diagnostic ignored "-Wunused-function"
75
#pragma clang diagnostic ignored "-Wmissing-declarations"
76

77
extern const char * auto_time_zones[];
78

79
int mainEntryClickHouseFormat(int argc, char ** argv)
80
{
81
    using namespace DB;
82

83
    try
84
    {
85
        boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
86
        desc.add_options()
87
            ("query", po::value<std::string>(), "query to format")
88
            ("help,h", "produce help message")
89
            ("comments", "keep comments in the output")
90
            ("hilite", "add syntax highlight with ANSI terminal escape sequences")
91
            ("oneline", "format in single line")
92
            ("max_line_length", po::value<size_t>()->default_value(0), "format in single line queries with length less than specified")
93
            ("quiet,q", "just check syntax, no output on success")
94
            ("multiquery,n", "allow multiple queries in the same file")
95
            ("obfuscate", "obfuscate instead of formatting")
96
            ("backslash", "add a backslash at the end of each line of the formatted query")
97
            ("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
98
            ("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
99
        ;
100

101
        Settings cmd_settings;
102
        for (const auto & field : cmd_settings.all())
103
        {
104
            std::string_view name = field.getName();
105
            if (name == "max_parser_depth" || name == "max_query_size")
106
                addProgramOption(cmd_settings, desc, name, field);
107
        }
108

109
        boost::program_options::variables_map options;
110
        boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
111
        po::notify(options);
112

113
        if (options.count("help"))
114
        {
115
            std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
116
            std::cout << desc << std::endl;
117
            return 1;
118
        }
119

120
        bool hilite = options.count("hilite");
121
        bool oneline = options.count("oneline");
122
        bool quiet = options.count("quiet");
123
        bool multiple = options.count("multiquery");
124
        bool print_comments = options.count("comments");
125
        size_t max_line_length = options["max_line_length"].as<size_t>();
126
        bool obfuscate = options.count("obfuscate");
127
        bool backslash = options.count("backslash");
128
        bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert");
129

130
        if (quiet && (hilite || oneline || obfuscate))
131
        {
132
            std::cerr << "Options 'hilite' or 'oneline' or 'obfuscate' have no sense in 'quiet' mode." << std::endl;
133
            return 2;
134
        }
135

136
        if (obfuscate && (hilite || oneline || quiet))
137
        {
138
            std::cerr << "Options 'hilite' or 'oneline' or 'quiet' have no sense in 'obfuscate' mode." << std::endl;
139
            return 2;
140
        }
141

142
        if (oneline && max_line_length)
143
        {
144
            std::cerr << "Options 'oneline' and 'max_line_length' are mutually exclusive." << std::endl;
145
            return 2;
146
        }
147

148
        if (max_line_length > 255)
149
        {
150
            std::cerr << "Option 'max_line_length' must be less than 256." << std::endl;
151
            return 2;
152
        }
153

154

155
        String query;
156

157
        if (options.count("query"))
158
        {
159
            query = options["query"].as<std::string>();
160
        }
161
        else
162
        {
163
            ReadBufferFromFileDescriptor in(STDIN_FILENO);
164
            readStringUntilEOF(query, in);
165
        }
166

167
        if (obfuscate)
168
        {
169
            WordMap obfuscated_words_map;
170
            WordSet used_nouns;
171
            SipHash hash_func;
172

173
            if (options.count("seed"))
174
            {
175
                hash_func.update(options["seed"].as<std::string>());
176
            }
177

178
            registerInterpreters();
179
            registerFunctions();
180
            registerAggregateFunctions();
181
            registerTableFunctions();
182
            registerDatabases();
183
            registerStorages();
184
            registerFormats();
185

186
            std::unordered_set<std::string> additional_names;
187

188
            auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames();
189
            auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames();
190
            auto all_known_settings = Settings().getAllRegisteredNames();
191
            auto all_known_merge_tree_settings = MergeTreeSettings().getAllRegisteredNames();
192

193
            additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end());
194
            additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end());
195
            additional_names.insert(all_known_settings.begin(), all_known_settings.end());
196
            additional_names.insert(all_known_merge_tree_settings.begin(), all_known_merge_tree_settings.end());
197

198
            for (auto * it = auto_time_zones; *it; ++it)
199
            {
200
                String time_zone_name = *it;
201

202
                /// Example: Europe/Amsterdam
203
                Strings split;
204
                boost::split(split, time_zone_name, [](char c){ return c == '/'; });
205
                for (const auto & word : split)
206
                    if (!word.empty())
207
                        additional_names.insert(word);
208
            }
209

210
            KnownIdentifierFunc is_known_identifier = [&](std::string_view name)
211
            {
212
                std::string what(name);
213

214
                return FunctionFactory::instance().has(what)
215
                    || AggregateFunctionFactory::instance().isAggregateFunctionName(what)
216
                    || TableFunctionFactory::instance().isTableFunctionName(what)
217
                    || FormatFactory::instance().isOutputFormat(what)
218
                    || FormatFactory::instance().isInputFormat(what)
219
                    || additional_names.contains(what);
220
            };
221

222
            WriteBufferFromFileDescriptor out(STDOUT_FILENO);
223
            obfuscateQueries(query, out, obfuscated_words_map, used_nouns, hash_func, is_known_identifier);
224
            out.finalize();
225
        }
226
        else
227
        {
228
            const char * pos = query.data();
229
            const char * end = pos + query.size();
230
            skipSpacesAndComments(pos, end, print_comments);
231

232
            ParserQuery parser(end, allow_settings_after_format_in_insert);
233
            while (pos != end)
234
            {
235
                size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos;
236

237
                ASTPtr res = parseQueryAndMovePosition(
238
                    parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks);
239

240
                std::unique_ptr<ReadBuffer> insert_query_payload;
241
                /// If the query is INSERT ... VALUES, then we will try to parse the data.
242
                if (auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
243
                {
244
                    if ("Values" != insert_query->format)
245
                        throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Can't format INSERT query with data format '{}'", insert_query->format);
246

247
                    /// Reset format to default to have `INSERT INTO table VALUES` instead of `INSERT INTO table VALUES FORMAT Values`
248
                    insert_query->format = {};
249

250
                    /// We assume that data ends with a newline character (same as client does)
251
                    const char * this_query_end = find_first_symbols<'\n'>(insert_query->data, end);
252
                    insert_query->end = this_query_end;
253
                    pos = this_query_end;
254
                    insert_query_payload = getReadBufferFromASTInsertQuery(res);
255
                }
256

257
                if (!quiet)
258
                {
259
                    if (!backslash)
260
                    {
261
                        WriteBufferFromOwnString str_buf;
262
                        formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length);
263

264
                        if (insert_query_payload)
265
                        {
266
                            str_buf.write(' ');
267
                            copyData(*insert_query_payload, str_buf);
268
                        }
269

270
                        String res_string = str_buf.str();
271
                        const char * s_pos = res_string.data();
272
                        const char * s_end = s_pos + res_string.size();
273
                        /// remove trailing spaces
274
                        while (s_end > s_pos && isWhitespaceASCIIOneLine(*(s_end - 1)))
275
                            --s_end;
276
                        WriteBufferFromOStream res_cout(std::cout, 4096);
277
                        /// For multiline queries we print ';' at new line,
278
                        /// but for single line queries we print ';' at the same line
279
                        bool has_multiple_lines = false;
280
                        while (s_pos != s_end)
281
                        {
282
                            if (*s_pos == '\n')
283
                                has_multiple_lines = true;
284
                            res_cout.write(*s_pos++);
285
                        }
286
                        res_cout.finalize();
287

288
                        if (multiple && !insert_query_payload)
289
                        {
290
                            if (oneline || !has_multiple_lines)
291
                                std::cout << ";\n";
292
                            else
293
                                std::cout << "\n;\n";
294
                        }
295
                        else if (multiple && insert_query_payload)
296
                            /// Do not need to add ; because it's already in the insert_query_payload
297
                            std::cout << "\n";
298

299
                        std::cout << std::endl;
300
                    }
301
                    /// add additional '\' at the end of each line;
302
                    else
303
                    {
304
                        WriteBufferFromOwnString str_buf;
305
                        formatAST(*res, str_buf, hilite, oneline);
306

307
                        auto res_string = str_buf.str();
308
                        WriteBufferFromOStream res_cout(std::cout, 4096);
309

310
                        const char * s_pos= res_string.data();
311
                        const char * s_end = s_pos + res_string.size();
312

313
                        while (s_pos != s_end)
314
                        {
315
                            if (*s_pos == '\n')
316
                                res_cout.write(" \\", 2);
317
                            res_cout.write(*s_pos++);
318
                        }
319

320
                        res_cout.finalize();
321
                        if (multiple)
322
                            std::cout << " \\\n;\n";
323
                        std::cout << std::endl;
324
                    }
325
                }
326
                skipSpacesAndComments(pos, end, print_comments);
327
                if (!multiple)
328
                    break;
329
            }
330
        }
331
    }
332
    catch (...)
333
    {
334
        std::cerr << getCurrentExceptionMessage(true) << '\n';
335
        return getCurrentExceptionCode();
336
    }
337
    return 0;
338
}
339

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.