ClickHouse
505 строк · 15.3 Кб
1#include <csignal>
2#include <csetjmp>
3#include <unistd.h>
4#include <fcntl.h>
5
6#include <new>
7#include <iostream>
8#include <vector>
9#include <string>
10#include <tuple>
11#include <string_view>
12#include <utility> /// pair
13
14#include <fmt/format.h>
15
16#include "config_tools.h"
17
18#include <Common/StringUtils/StringUtils.h>
19#include <Common/getHashOfLoadedBinary.h>
20#include <Common/IO.h>
21
22#include <base/phdr_cache.h>
23#include <base/coverage.h>
24
25
26/// Universal executable for various clickhouse applications
27int mainEntryClickHouseServer(int argc, char ** argv);
28int mainEntryClickHouseClient(int argc, char ** argv);
29int mainEntryClickHouseLocal(int argc, char ** argv);
30int mainEntryClickHouseBenchmark(int argc, char ** argv);
31int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
32int mainEntryClickHouseCompressor(int argc, char ** argv);
33int mainEntryClickHouseFormat(int argc, char ** argv);
34int mainEntryClickHouseObfuscator(int argc, char ** argv);
35int mainEntryClickHouseGitImport(int argc, char ** argv);
36int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
37int mainEntryClickHouseSU(int argc, char ** argv);
38int mainEntryClickHouseDisks(int argc, char ** argv);
39
40int mainEntryClickHouseHashBinary(int, char **)
41{
42/// Intentionally without newline. So you can run:
43/// objcopy --add-section .clickhouse.hash=<(./clickhouse hash-binary) clickhouse
44std::cout << getHashOfLoadedBinaryHex();
45return 0;
46}
47
48#if ENABLE_CLICKHOUSE_KEEPER
49int mainEntryClickHouseKeeper(int argc, char ** argv);
50#endif
51#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
52int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
53#endif
54#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
55int mainEntryClickHouseKeeperClient(int argc, char ** argv);
56#endif
57
58// install
59int mainEntryClickHouseInstall(int argc, char ** argv);
60int mainEntryClickHouseStart(int argc, char ** argv);
61int mainEntryClickHouseStop(int argc, char ** argv);
62int mainEntryClickHouseStatus(int argc, char ** argv);
63int mainEntryClickHouseRestart(int argc, char ** argv);
64
65namespace
66{
67
68using MainFunc = int (*)(int, char**);
69
70/// Add an item here to register new application
71std::pair<std::string_view, MainFunc> clickhouse_applications[] =
72{
73{"local", mainEntryClickHouseLocal},
74{"client", mainEntryClickHouseClient},
75{"benchmark", mainEntryClickHouseBenchmark},
76{"server", mainEntryClickHouseServer},
77{"extract-from-config", mainEntryClickHouseExtractFromConfig},
78{"compressor", mainEntryClickHouseCompressor},
79{"format", mainEntryClickHouseFormat},
80{"obfuscator", mainEntryClickHouseObfuscator},
81{"git-import", mainEntryClickHouseGitImport},
82{"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader},
83{"su", mainEntryClickHouseSU},
84{"hash-binary", mainEntryClickHouseHashBinary},
85{"disks", mainEntryClickHouseDisks},
86
87// keeper
88#if ENABLE_CLICKHOUSE_KEEPER
89{"keeper", mainEntryClickHouseKeeper},
90#endif
91#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
92{"keeper-converter", mainEntryClickHouseKeeperConverter},
93#endif
94#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
95{"keeper-client", mainEntryClickHouseKeeperClient},
96#endif
97
98// install
99{"install", mainEntryClickHouseInstall},
100{"start", mainEntryClickHouseStart},
101{"stop", mainEntryClickHouseStop},
102{"status", mainEntryClickHouseStatus},
103{"restart", mainEntryClickHouseRestart},
104};
105
106int printHelp(int, char **)
107{
108std::cerr << "Use one of the following commands:" << std::endl;
109for (auto & application : clickhouse_applications)
110std::cerr << "clickhouse " << application.first << " [args] " << std::endl;
111return -1;
112}
113
114/// Add an item here to register a new short name
115std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
116{
117{"chl", "local"},
118{"chc", "client"},
119};
120
121
122enum class InstructionFail
123{
124NONE = 0,
125SSE3 = 1,
126SSSE3 = 2,
127SSE4_1 = 3,
128SSE4_2 = 4,
129POPCNT = 5,
130AVX = 6,
131AVX2 = 7,
132AVX512 = 8
133};
134
135auto instructionFailToString(InstructionFail fail)
136{
137switch (fail)
138{
139#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1)
140case InstructionFail::NONE:
141ret("NONE");
142case InstructionFail::SSE3:
143ret("SSE3");
144case InstructionFail::SSSE3:
145ret("SSSE3");
146case InstructionFail::SSE4_1:
147ret("SSE4.1");
148case InstructionFail::SSE4_2:
149ret("SSE4.2");
150case InstructionFail::POPCNT:
151ret("POPCNT");
152case InstructionFail::AVX:
153ret("AVX");
154case InstructionFail::AVX2:
155ret("AVX2");
156case InstructionFail::AVX512:
157ret("AVX512");
158}
159UNREACHABLE();
160}
161
162
163sigjmp_buf jmpbuf;
164
165[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *)
166{
167siglongjmp(jmpbuf, 1);
168}
169
170/// Check if necessary SSE extensions are available by trying to execute some sse instructions.
171/// If instruction is unavailable, SIGILL will be sent by kernel.
172void checkRequiredInstructionsImpl(volatile InstructionFail & fail)
173{
174#if defined(__SSE3__)
175fail = InstructionFail::SSE3;
176__asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0");
177#endif
178
179#if defined(__SSSE3__)
180fail = InstructionFail::SSSE3;
181__asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0");
182
183#endif
184
185#if defined(__SSE4_1__)
186fail = InstructionFail::SSE4_1;
187__asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0");
188#endif
189
190#if defined(__SSE4_2__)
191fail = InstructionFail::SSE4_2;
192__asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0");
193#endif
194
195/// Defined by -msse4.2
196#if defined(__POPCNT__)
197fail = InstructionFail::POPCNT;
198{
199uint64_t a = 0;
200uint64_t b = 0;
201__asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :);
202}
203#endif
204
205#if defined(__AVX__)
206fail = InstructionFail::AVX;
207__asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0");
208#endif
209
210#if defined(__AVX2__)
211fail = InstructionFail::AVX2;
212__asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0");
213#endif
214
215#if defined(__AVX512__)
216fail = InstructionFail::AVX512;
217__asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0");
218#endif
219
220fail = InstructionFail::NONE;
221}
222
223/// Macros to avoid using strlen(), since it may fail if SSE is not supported.
224#define writeError(data) do \
225{ \
226static_assert(__builtin_constant_p(data)); \
227if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \
228_Exit(1); \
229} while (false)
230
231/// Check SSE and others instructions availability. Calls exit on fail.
232/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions.
233void checkRequiredInstructions()
234{
235struct sigaction sa{};
236struct sigaction sa_old{};
237sa.sa_sigaction = sigIllCheckHandler;
238sa.sa_flags = SA_SIGINFO;
239auto signal = SIGILL;
240if (sigemptyset(&sa.sa_mask) != 0
241|| sigaddset(&sa.sa_mask, signal) != 0
242|| sigaction(signal, &sa, &sa_old) != 0)
243{
244/// You may wonder about strlen.
245/// Typical implementation of strlen is using SSE4.2 or AVX2.
246/// But this is not the case because it's compiler builtin and is executed at compile time.
247
248writeError("Can not set signal handler\n");
249_Exit(1);
250}
251
252volatile InstructionFail fail = InstructionFail::NONE;
253
254if (sigsetjmp(jmpbuf, 1))
255{
256writeError("Instruction check fail. The CPU does not support ");
257if (!std::apply(writeRetry, instructionFailToString(fail)))
258_Exit(1);
259writeError(" instruction set.\n");
260_Exit(1);
261}
262
263checkRequiredInstructionsImpl(fail);
264
265if (sigaction(signal, &sa_old, nullptr))
266{
267writeError("Can not set signal handler\n");
268_Exit(1);
269}
270}
271
272struct Checker
273{
274Checker()
275{
276checkRequiredInstructions();
277}
278} checker
279#ifndef OS_DARWIN
280__attribute__((init_priority(101))) /// Run before other static initializers.
281#endif
282;
283
284
285#if !defined(USE_MUSL)
286/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
287void checkHarmfulEnvironmentVariables(char ** argv)
288{
289std::initializer_list<const char *> harmful_env_variables = {
290/// The list is a selection from "man ld-linux".
291"LD_PRELOAD",
292"LD_LIBRARY_PATH",
293"LD_ORIGIN_PATH",
294"LD_AUDIT",
295"LD_DYNAMIC_WEAK",
296/// The list is a selection from "man dyld" (osx).
297"DYLD_LIBRARY_PATH",
298"DYLD_FALLBACK_LIBRARY_PATH",
299"DYLD_VERSIONED_LIBRARY_PATH",
300"DYLD_INSERT_LIBRARIES",
301};
302
303bool require_reexec = false;
304for (const auto * var : harmful_env_variables)
305{
306if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe)
307{
308/// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful
309if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently
310{
311fmt::print(stderr, "Cannot override {} environment variable", var);
312_exit(1);
313}
314require_reexec = true;
315}
316}
317
318if (require_reexec)
319{
320/// Use execvp() over execv() to search in PATH.
321///
322/// This should be safe, since:
323/// - if argv[0] is relative path - it is OK
324/// - if argv[0] has only basename, the it will search in PATH, like shell will do.
325///
326/// Also note, that this (search in PATH) because there is no easy and
327/// portable way to get absolute path of argv[0].
328/// - on linux there is /proc/self/exec and AT_EXECFN
329/// - but on other OSes there is no such thing (especially on OSX).
330///
331/// And since static linking will be done someday anyway,
332/// let's not pollute the code base with special cases.
333int error = execvp(argv[0], argv);
334_exit(error);
335}
336}
337#endif
338
339
340#if defined(SANITIZE_COVERAGE)
341__attribute__((no_sanitize("coverage")))
342void dumpCoverage()
343{
344/// A user can request to dump the coverage information into files at exit.
345/// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
346/// that cannot introspect it with SQL functions at runtime.
347
348/// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid'
349/// containing the list of addresses of covered .
350
351/// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
352
353if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
354{
355auto dump = [](const std::string & name, auto span)
356{
357/// Write only non-zeros.
358std::vector<uintptr_t> data;
359data.reserve(span.size());
360for (auto addr : span)
361if (addr)
362data.push_back(addr);
363
364int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
365if (-1 == fd)
366{
367writeError("Cannot open a file to write the coverage data\n");
368}
369else
370{
371if (!writeRetry(fd, reinterpret_cast<const char *>(data.data()), data.size() * sizeof(data[0])))
372writeError("Cannot write the coverage data to a file\n");
373if (0 != ::close(fd))
374writeError("Cannot close the file with coverage data\n");
375}
376};
377
378dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage());
379}
380}
381#endif
382
383}
384
385bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
386{
387for (const auto & [alias, name] : clickhouse_short_names)
388if (app_suffix == name
389&& !argv.empty() && (alias == argv[0] || endsWith(argv[0], "/" + std::string(alias))))
390return true;
391
392/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
393if (argv.size() >= 2)
394{
395auto first_arg = argv.begin() + 1;
396
397/// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
398if (*first_arg == app_suffix
399|| (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix))
400{
401argv.erase(first_arg);
402return true;
403}
404}
405
406/// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
407std::string app_name = "clickhouse-" + std::string(app_suffix);
408return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
409}
410
411/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure.
412/// We don't use it. But it can be used by some libraries for implementation of "plugins".
413/// We absolutely discourage the ancient technique of loading
414/// 3rd-party uncontrolled dangerous libraries into the process address space,
415/// because it is insane.
416
417#if !defined(USE_MUSL)
418extern "C"
419{
420void * dlopen(const char *, int)
421{
422return nullptr;
423}
424
425void * dlmopen(long, const char *, int) // NOLINT
426{
427return nullptr;
428}
429
430int dlclose(void *)
431{
432return 0;
433}
434
435const char * dlerror()
436{
437return "ClickHouse does not allow dynamic library loading";
438}
439}
440#endif
441
442/// This allows to implement assert to forbid initialization of a class in static constructors.
443/// Usage:
444///
445/// extern bool inside_main;
446/// class C { C() { assert(inside_main); } };
447bool inside_main = false;
448
449int main(int argc_, char ** argv_)
450{
451inside_main = true;
452SCOPE_EXIT({ inside_main = false; });
453
454/// PHDR cache is required for query profiler to work reliably
455/// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen)
456/// will work only after additional call of this function.
457/// Note: we forbid dlopen in our code.
458updatePHDRCache();
459
460#if !defined(USE_MUSL)
461checkHarmfulEnvironmentVariables(argv_);
462#endif
463
464/// This is used for testing. For example,
465/// clickhouse-local should be able to run a simple query without throw/catch.
466if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe)
467DB::terminate_on_any_exception = true;
468
469/// Reset new handler to default (that throws std::bad_alloc)
470/// It is needed because LLVM library clobbers it.
471std::set_new_handler(nullptr);
472
473std::vector<char *> argv(argv_, argv_ + argc_);
474
475/// Print a basic help if nothing was matched
476MainFunc main_func = printHelp;
477
478for (auto & application : clickhouse_applications)
479{
480if (isClickhouseApp(application.first, argv))
481{
482main_func = application.second;
483break;
484}
485}
486
487/// Interpret binary without argument or with arguments starts with dash
488/// ('-') as clickhouse-local for better usability:
489///
490/// clickhouse help # dumps help
491/// clickhouse -q 'select 1' # use local
492/// clickhouse # spawn local
493/// clickhouse local # spawn local
494///
495if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-'))
496main_func = mainEntryClickHouseLocal;
497
498int exit_code = main_func(static_cast<int>(argv.size()), argv.data());
499
500#if defined(SANITIZE_COVERAGE)
501dumpCoverage();
502#endif
503
504return exit_code;
505}
506