ClickHouse
338 строк · 13.2 Кб
1#include <functional>2#include <iostream>3#include <string_view>4#include <boost/program_options.hpp>5
6#include <IO/copyData.h>7#include <IO/ReadBufferFromFileDescriptor.h>8#include <IO/ReadHelpers.h>9#include <IO/WriteBufferFromFileDescriptor.h>10#include <IO/WriteBufferFromOStream.h>11#include <Interpreters/registerInterpreters.h>12#include <Parsers/ASTInsertQuery.h>13#include <Parsers/ParserQuery.h>14#include <Parsers/formatAST.h>15#include <Parsers/obfuscateQueries.h>16#include <Parsers/parseQuery.h>17#include <Common/ErrorCodes.h>18#include <Common/StringUtils/StringUtils.h>19#include <Common/TerminalSize.h>20#include <Core/BaseSettingsProgramOptions.h>21
22#include <Interpreters/Context.h>23#include <Functions/FunctionFactory.h>24#include <Databases/registerDatabases.h>25#include <Functions/registerFunctions.h>26#include <AggregateFunctions/AggregateFunctionFactory.h>27#include <AggregateFunctions/registerAggregateFunctions.h>28#include <TableFunctions/TableFunctionFactory.h>29#include <TableFunctions/registerTableFunctions.h>30#include <Storages/StorageFactory.h>31#include <Storages/registerStorages.h>32#include <Storages/MergeTree/MergeTreeSettings.h>33#include <DataTypes/DataTypeFactory.h>34#include <Formats/FormatFactory.h>35#include <Formats/registerFormats.h>36#include <Processors/Transforms/getSourceFromASTInsertQuery.h>37
38
39namespace DB::ErrorCodes40{
41extern const int NOT_IMPLEMENTED;42}
43
44namespace
45{
46
47void skipSpacesAndComments(const char*& pos, const char* end, bool print_comments)48{
49do50{51/// skip spaces to avoid throw exception after last query52while (pos != end && std::isspace(*pos))53++pos;54
55const char * comment_begin = pos;56/// for skip comment after the last query and to not throw exception57if (end - pos > 2 && *pos == '-' && *(pos + 1) == '-')58{59pos += 2;60/// skip until the end of the line61while (pos != end && *pos != '\n')62++pos;63if (print_comments)64std::cout << std::string_view(comment_begin, pos - comment_begin) << "\n";65}66/// need to parse next sql67else68break;69} while (pos != end);70}
71
72}
73
74#pragma clang diagnostic ignored "-Wunused-function"75#pragma clang diagnostic ignored "-Wmissing-declarations"76
77extern const char * auto_time_zones[];78
79int mainEntryClickHouseFormat(int argc, char ** argv)80{
81using namespace DB;82
83try84{85boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());86desc.add_options()87("query", po::value<std::string>(), "query to format")88("help,h", "produce help message")89("comments", "keep comments in the output")90("hilite", "add syntax highlight with ANSI terminal escape sequences")91("oneline", "format in single line")92("max_line_length", po::value<size_t>()->default_value(0), "format in single line queries with length less than specified")93("quiet,q", "just check syntax, no output on success")94("multiquery,n", "allow multiple queries in the same file")95("obfuscate", "obfuscate instead of formatting")96("backslash", "add a backslash at the end of each line of the formatted query")97("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")98("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")99;100
101Settings cmd_settings;102for (const auto & field : cmd_settings.all())103{104std::string_view name = field.getName();105if (name == "max_parser_depth" || name == "max_query_size")106addProgramOption(cmd_settings, desc, name, field);107}108
109boost::program_options::variables_map options;110boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);111po::notify(options);112
113if (options.count("help"))114{115std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;116std::cout << desc << std::endl;117return 1;118}119
120bool hilite = options.count("hilite");121bool oneline = options.count("oneline");122bool quiet = options.count("quiet");123bool multiple = options.count("multiquery");124bool print_comments = options.count("comments");125size_t max_line_length = options["max_line_length"].as<size_t>();126bool obfuscate = options.count("obfuscate");127bool backslash = options.count("backslash");128bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert");129
130if (quiet && (hilite || oneline || obfuscate))131{132std::cerr << "Options 'hilite' or 'oneline' or 'obfuscate' have no sense in 'quiet' mode." << std::endl;133return 2;134}135
136if (obfuscate && (hilite || oneline || quiet))137{138std::cerr << "Options 'hilite' or 'oneline' or 'quiet' have no sense in 'obfuscate' mode." << std::endl;139return 2;140}141
142if (oneline && max_line_length)143{144std::cerr << "Options 'oneline' and 'max_line_length' are mutually exclusive." << std::endl;145return 2;146}147
148if (max_line_length > 255)149{150std::cerr << "Option 'max_line_length' must be less than 256." << std::endl;151return 2;152}153
154
155String query;156
157if (options.count("query"))158{159query = options["query"].as<std::string>();160}161else162{163ReadBufferFromFileDescriptor in(STDIN_FILENO);164readStringUntilEOF(query, in);165}166
167if (obfuscate)168{169WordMap obfuscated_words_map;170WordSet used_nouns;171SipHash hash_func;172
173if (options.count("seed"))174{175hash_func.update(options["seed"].as<std::string>());176}177
178registerInterpreters();179registerFunctions();180registerAggregateFunctions();181registerTableFunctions();182registerDatabases();183registerStorages();184registerFormats();185
186std::unordered_set<std::string> additional_names;187
188auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames();189auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames();190auto all_known_settings = Settings().getAllRegisteredNames();191auto all_known_merge_tree_settings = MergeTreeSettings().getAllRegisteredNames();192
193additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end());194additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end());195additional_names.insert(all_known_settings.begin(), all_known_settings.end());196additional_names.insert(all_known_merge_tree_settings.begin(), all_known_merge_tree_settings.end());197
198for (auto * it = auto_time_zones; *it; ++it)199{200String time_zone_name = *it;201
202/// Example: Europe/Amsterdam203Strings split;204boost::split(split, time_zone_name, [](char c){ return c == '/'; });205for (const auto & word : split)206if (!word.empty())207additional_names.insert(word);208}209
210KnownIdentifierFunc is_known_identifier = [&](std::string_view name)211{212std::string what(name);213
214return FunctionFactory::instance().has(what)215|| AggregateFunctionFactory::instance().isAggregateFunctionName(what)216|| TableFunctionFactory::instance().isTableFunctionName(what)217|| FormatFactory::instance().isOutputFormat(what)218|| FormatFactory::instance().isInputFormat(what)219|| additional_names.contains(what);220};221
222WriteBufferFromFileDescriptor out(STDOUT_FILENO);223obfuscateQueries(query, out, obfuscated_words_map, used_nouns, hash_func, is_known_identifier);224out.finalize();225}226else227{228const char * pos = query.data();229const char * end = pos + query.size();230skipSpacesAndComments(pos, end, print_comments);231
232ParserQuery parser(end, allow_settings_after_format_in_insert);233while (pos != end)234{235size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos;236
237ASTPtr res = parseQueryAndMovePosition(238parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks);239
240std::unique_ptr<ReadBuffer> insert_query_payload;241/// If the query is INSERT ... VALUES, then we will try to parse the data.242if (auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)243{244if ("Values" != insert_query->format)245throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Can't format INSERT query with data format '{}'", insert_query->format);246
247/// Reset format to default to have `INSERT INTO table VALUES` instead of `INSERT INTO table VALUES FORMAT Values`248insert_query->format = {};249
250/// We assume that data ends with a newline character (same as client does)251const char * this_query_end = find_first_symbols<'\n'>(insert_query->data, end);252insert_query->end = this_query_end;253pos = this_query_end;254insert_query_payload = getReadBufferFromASTInsertQuery(res);255}256
257if (!quiet)258{259if (!backslash)260{261WriteBufferFromOwnString str_buf;262formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length);263
264if (insert_query_payload)265{266str_buf.write(' ');267copyData(*insert_query_payload, str_buf);268}269
270String res_string = str_buf.str();271const char * s_pos = res_string.data();272const char * s_end = s_pos + res_string.size();273/// remove trailing spaces274while (s_end > s_pos && isWhitespaceASCIIOneLine(*(s_end - 1)))275--s_end;276WriteBufferFromOStream res_cout(std::cout, 4096);277/// For multiline queries we print ';' at new line,278/// but for single line queries we print ';' at the same line279bool has_multiple_lines = false;280while (s_pos != s_end)281{282if (*s_pos == '\n')283has_multiple_lines = true;284res_cout.write(*s_pos++);285}286res_cout.finalize();287
288if (multiple && !insert_query_payload)289{290if (oneline || !has_multiple_lines)291std::cout << ";\n";292else293std::cout << "\n;\n";294}295else if (multiple && insert_query_payload)296/// Do not need to add ; because it's already in the insert_query_payload297std::cout << "\n";298
299std::cout << std::endl;300}301/// add additional '\' at the end of each line;302else303{304WriteBufferFromOwnString str_buf;305formatAST(*res, str_buf, hilite, oneline);306
307auto res_string = str_buf.str();308WriteBufferFromOStream res_cout(std::cout, 4096);309
310const char * s_pos= res_string.data();311const char * s_end = s_pos + res_string.size();312
313while (s_pos != s_end)314{315if (*s_pos == '\n')316res_cout.write(" \\", 2);317res_cout.write(*s_pos++);318}319
320res_cout.finalize();321if (multiple)322std::cout << " \\\n;\n";323std::cout << std::endl;324}325}326skipSpacesAndComments(pos, end, print_comments);327if (!multiple)328break;329}330}331}332catch (...)333{334std::cerr << getCurrentExceptionMessage(true) << '\n';335return getCurrentExceptionCode();336}337return 0;338}
339