ClickHouse

Форк
0
/
main.cpp 
505 строк · 15.3 Кб
1
#include <csignal>
2
#include <csetjmp>
3
#include <unistd.h>
4
#include <fcntl.h>
5

6
#include <new>
7
#include <iostream>
8
#include <vector>
9
#include <string>
10
#include <tuple>
11
#include <string_view>
12
#include <utility> /// pair
13

14
#include <fmt/format.h>
15

16
#include "config_tools.h"
17

18
#include <Common/StringUtils/StringUtils.h>
19
#include <Common/getHashOfLoadedBinary.h>
20
#include <Common/IO.h>
21

22
#include <base/phdr_cache.h>
23
#include <base/coverage.h>
24

25

26
/// Universal executable for various clickhouse applications
27
int mainEntryClickHouseServer(int argc, char ** argv);
28
int mainEntryClickHouseClient(int argc, char ** argv);
29
int mainEntryClickHouseLocal(int argc, char ** argv);
30
int mainEntryClickHouseBenchmark(int argc, char ** argv);
31
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
32
int mainEntryClickHouseCompressor(int argc, char ** argv);
33
int mainEntryClickHouseFormat(int argc, char ** argv);
34
int mainEntryClickHouseObfuscator(int argc, char ** argv);
35
int mainEntryClickHouseGitImport(int argc, char ** argv);
36
int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
37
int mainEntryClickHouseSU(int argc, char ** argv);
38
int mainEntryClickHouseDisks(int argc, char ** argv);
39

40
int mainEntryClickHouseHashBinary(int, char **)
41
{
42
    /// Intentionally without newline. So you can run:
43
    /// objcopy --add-section .clickhouse.hash=<(./clickhouse hash-binary) clickhouse
44
    std::cout << getHashOfLoadedBinaryHex();
45
    return 0;
46
}
47

48
#if ENABLE_CLICKHOUSE_KEEPER
49
int mainEntryClickHouseKeeper(int argc, char ** argv);
50
#endif
51
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
52
int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
53
#endif
54
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
55
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
56
#endif
57

58
// install
59
int mainEntryClickHouseInstall(int argc, char ** argv);
60
int mainEntryClickHouseStart(int argc, char ** argv);
61
int mainEntryClickHouseStop(int argc, char ** argv);
62
int mainEntryClickHouseStatus(int argc, char ** argv);
63
int mainEntryClickHouseRestart(int argc, char ** argv);
64

65
namespace
66
{
67

68
using MainFunc = int (*)(int, char**);
69

70
/// Add an item here to register new application
71
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
72
{
73
    {"local", mainEntryClickHouseLocal},
74
    {"client", mainEntryClickHouseClient},
75
    {"benchmark", mainEntryClickHouseBenchmark},
76
    {"server", mainEntryClickHouseServer},
77
    {"extract-from-config", mainEntryClickHouseExtractFromConfig},
78
    {"compressor", mainEntryClickHouseCompressor},
79
    {"format", mainEntryClickHouseFormat},
80
    {"obfuscator", mainEntryClickHouseObfuscator},
81
    {"git-import", mainEntryClickHouseGitImport},
82
    {"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader},
83
    {"su", mainEntryClickHouseSU},
84
    {"hash-binary", mainEntryClickHouseHashBinary},
85
    {"disks", mainEntryClickHouseDisks},
86

87
    // keeper
88
#if ENABLE_CLICKHOUSE_KEEPER
89
    {"keeper", mainEntryClickHouseKeeper},
90
#endif
91
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
92
    {"keeper-converter", mainEntryClickHouseKeeperConverter},
93
#endif
94
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
95
    {"keeper-client", mainEntryClickHouseKeeperClient},
96
#endif
97

98
    // install
99
    {"install", mainEntryClickHouseInstall},
100
    {"start", mainEntryClickHouseStart},
101
    {"stop", mainEntryClickHouseStop},
102
    {"status", mainEntryClickHouseStatus},
103
    {"restart", mainEntryClickHouseRestart},
104
};
105

106
int printHelp(int, char **)
107
{
108
    std::cerr << "Use one of the following commands:" << std::endl;
109
    for (auto & application : clickhouse_applications)
110
        std::cerr << "clickhouse " << application.first << " [args] " << std::endl;
111
    return -1;
112
}
113

114
/// Add an item here to register a new short name
115
std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
116
{
117
    {"chl", "local"},
118
    {"chc", "client"},
119
};
120

121

122
enum class InstructionFail
123
{
124
    NONE = 0,
125
    SSE3 = 1,
126
    SSSE3 = 2,
127
    SSE4_1 = 3,
128
    SSE4_2 = 4,
129
    POPCNT = 5,
130
    AVX = 6,
131
    AVX2 = 7,
132
    AVX512 = 8
133
};
134

135
auto instructionFailToString(InstructionFail fail)
136
{
137
    switch (fail)
138
    {
139
#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1)
140
        case InstructionFail::NONE:
141
            ret("NONE");
142
        case InstructionFail::SSE3:
143
            ret("SSE3");
144
        case InstructionFail::SSSE3:
145
            ret("SSSE3");
146
        case InstructionFail::SSE4_1:
147
            ret("SSE4.1");
148
        case InstructionFail::SSE4_2:
149
            ret("SSE4.2");
150
        case InstructionFail::POPCNT:
151
            ret("POPCNT");
152
        case InstructionFail::AVX:
153
            ret("AVX");
154
        case InstructionFail::AVX2:
155
            ret("AVX2");
156
        case InstructionFail::AVX512:
157
            ret("AVX512");
158
    }
159
    UNREACHABLE();
160
}
161

162

163
sigjmp_buf jmpbuf;
164

165
[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *)
166
{
167
    siglongjmp(jmpbuf, 1);
168
}
169

170
/// Check if necessary SSE extensions are available by trying to execute some sse instructions.
171
/// If instruction is unavailable, SIGILL will be sent by kernel.
172
void checkRequiredInstructionsImpl(volatile InstructionFail & fail)
173
{
174
#if defined(__SSE3__)
175
    fail = InstructionFail::SSE3;
176
    __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0");
177
#endif
178

179
#if defined(__SSSE3__)
180
    fail = InstructionFail::SSSE3;
181
    __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0");
182

183
#endif
184

185
#if defined(__SSE4_1__)
186
    fail = InstructionFail::SSE4_1;
187
    __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0");
188
#endif
189

190
#if defined(__SSE4_2__)
191
    fail = InstructionFail::SSE4_2;
192
    __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0");
193
#endif
194

195
    /// Defined by -msse4.2
196
#if defined(__POPCNT__)
197
    fail = InstructionFail::POPCNT;
198
    {
199
        uint64_t a = 0;
200
        uint64_t b = 0;
201
        __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :);
202
    }
203
#endif
204

205
#if defined(__AVX__)
206
    fail = InstructionFail::AVX;
207
    __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0");
208
#endif
209

210
#if defined(__AVX2__)
211
    fail = InstructionFail::AVX2;
212
    __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0");
213
#endif
214

215
#if defined(__AVX512__)
216
    fail = InstructionFail::AVX512;
217
    __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0");
218
#endif
219

220
    fail = InstructionFail::NONE;
221
}
222

223
/// Macros to avoid using strlen(), since it may fail if SSE is not supported.
224
#define writeError(data) do \
225
    { \
226
        static_assert(__builtin_constant_p(data)); \
227
        if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \
228
            _Exit(1); \
229
    } while (false)
230

231
/// Check SSE and others instructions availability. Calls exit on fail.
232
/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions.
233
void checkRequiredInstructions()
234
{
235
    struct sigaction sa{};
236
    struct sigaction sa_old{};
237
    sa.sa_sigaction = sigIllCheckHandler;
238
    sa.sa_flags = SA_SIGINFO;
239
    auto signal = SIGILL;
240
    if (sigemptyset(&sa.sa_mask) != 0
241
        || sigaddset(&sa.sa_mask, signal) != 0
242
        || sigaction(signal, &sa, &sa_old) != 0)
243
    {
244
        /// You may wonder about strlen.
245
        /// Typical implementation of strlen is using SSE4.2 or AVX2.
246
        /// But this is not the case because it's compiler builtin and is executed at compile time.
247

248
        writeError("Can not set signal handler\n");
249
        _Exit(1);
250
    }
251

252
    volatile InstructionFail fail = InstructionFail::NONE;
253

254
    if (sigsetjmp(jmpbuf, 1))
255
    {
256
        writeError("Instruction check fail. The CPU does not support ");
257
        if (!std::apply(writeRetry, instructionFailToString(fail)))
258
            _Exit(1);
259
        writeError(" instruction set.\n");
260
        _Exit(1);
261
    }
262

263
    checkRequiredInstructionsImpl(fail);
264

265
    if (sigaction(signal, &sa_old, nullptr))
266
    {
267
        writeError("Can not set signal handler\n");
268
        _Exit(1);
269
    }
270
}
271

272
struct Checker
273
{
274
    Checker()
275
    {
276
        checkRequiredInstructions();
277
    }
278
} checker
279
#ifndef OS_DARWIN
280
    __attribute__((init_priority(101)))    /// Run before other static initializers.
281
#endif
282
;
283

284

285
#if !defined(USE_MUSL)
286
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
287
void checkHarmfulEnvironmentVariables(char ** argv)
288
{
289
    std::initializer_list<const char *> harmful_env_variables = {
290
        /// The list is a selection from "man ld-linux".
291
        "LD_PRELOAD",
292
        "LD_LIBRARY_PATH",
293
        "LD_ORIGIN_PATH",
294
        "LD_AUDIT",
295
        "LD_DYNAMIC_WEAK",
296
        /// The list is a selection from "man dyld" (osx).
297
        "DYLD_LIBRARY_PATH",
298
        "DYLD_FALLBACK_LIBRARY_PATH",
299
        "DYLD_VERSIONED_LIBRARY_PATH",
300
        "DYLD_INSERT_LIBRARIES",
301
    };
302

303
    bool require_reexec = false;
304
    for (const auto * var : harmful_env_variables)
305
    {
306
        if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe)
307
        {
308
            /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful
309
            if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently
310
            {
311
                fmt::print(stderr, "Cannot override {} environment variable", var);
312
                _exit(1);
313
            }
314
            require_reexec = true;
315
        }
316
    }
317

318
    if (require_reexec)
319
    {
320
        /// Use execvp() over execv() to search in PATH.
321
        ///
322
        /// This should be safe, since:
323
        /// - if argv[0] is relative path - it is OK
324
        /// - if argv[0] has only basename, the it will search in PATH, like shell will do.
325
        ///
326
        /// Also note, that this (search in PATH) because there is no easy and
327
        /// portable way to get absolute path of argv[0].
328
        /// - on linux there is /proc/self/exec and AT_EXECFN
329
        /// - but on other OSes there is no such thing (especially on OSX).
330
        ///
331
        /// And since static linking will be done someday anyway,
332
        /// let's not pollute the code base with special cases.
333
        int error = execvp(argv[0], argv);
334
        _exit(error);
335
    }
336
}
337
#endif
338

339

340
#if defined(SANITIZE_COVERAGE)
341
__attribute__((no_sanitize("coverage")))
342
void dumpCoverage()
343
{
344
    /// A user can request to dump the coverage information into files at exit.
345
    /// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
346
    /// that cannot introspect it with SQL functions at runtime.
347

348
    /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid'
349
    /// containing the list of addresses of covered .
350

351
    /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
352

353
    if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
354
    {
355
        auto dump = [](const std::string & name, auto span)
356
        {
357
            /// Write only non-zeros.
358
            std::vector<uintptr_t> data;
359
            data.reserve(span.size());
360
            for (auto addr : span)
361
                if (addr)
362
                    data.push_back(addr);
363

364
            int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
365
            if (-1 == fd)
366
            {
367
                writeError("Cannot open a file to write the coverage data\n");
368
            }
369
            else
370
            {
371
                if (!writeRetry(fd, reinterpret_cast<const char *>(data.data()), data.size() * sizeof(data[0])))
372
                    writeError("Cannot write the coverage data to a file\n");
373
                if (0 != ::close(fd))
374
                    writeError("Cannot close the file with coverage data\n");
375
            }
376
        };
377

378
        dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage());
379
    }
380
}
381
#endif
382

383
}
384

385
bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
386
{
387
    for (const auto & [alias, name] : clickhouse_short_names)
388
        if (app_suffix == name
389
            && !argv.empty() && (alias == argv[0] || endsWith(argv[0], "/" + std::string(alias))))
390
            return true;
391

392
    /// Use app if the first arg 'app' is passed (the arg should be quietly removed)
393
    if (argv.size() >= 2)
394
    {
395
        auto first_arg = argv.begin() + 1;
396

397
        /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
398
        if (*first_arg == app_suffix
399
            || (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix))
400
        {
401
            argv.erase(first_arg);
402
            return true;
403
        }
404
    }
405

406
    /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
407
    std::string app_name = "clickhouse-" + std::string(app_suffix);
408
    return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
409
}
410

411
/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure.
412
/// We don't use it. But it can be used by some libraries for implementation of "plugins".
413
/// We absolutely discourage the ancient technique of loading
414
/// 3rd-party uncontrolled dangerous libraries into the process address space,
415
/// because it is insane.
416

417
#if !defined(USE_MUSL)
418
extern "C"
419
{
420
    void * dlopen(const char *, int)
421
    {
422
        return nullptr;
423
    }
424

425
    void * dlmopen(long, const char *, int) // NOLINT
426
    {
427
        return nullptr;
428
    }
429

430
    int dlclose(void *)
431
    {
432
        return 0;
433
    }
434

435
    const char * dlerror()
436
    {
437
        return "ClickHouse does not allow dynamic library loading";
438
    }
439
}
440
#endif
441

442
/// This allows to implement assert to forbid initialization of a class in static constructors.
443
/// Usage:
444
///
445
/// extern bool inside_main;
446
/// class C { C() { assert(inside_main); } };
447
bool inside_main = false;
448

449
int main(int argc_, char ** argv_)
450
{
451
    inside_main = true;
452
    SCOPE_EXIT({ inside_main = false; });
453

454
    /// PHDR cache is required for query profiler to work reliably
455
    /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen)
456
    ///  will work only after additional call of this function.
457
    /// Note: we forbid dlopen in our code.
458
    updatePHDRCache();
459

460
#if !defined(USE_MUSL)
461
    checkHarmfulEnvironmentVariables(argv_);
462
#endif
463

464
    /// This is used for testing. For example,
465
    /// clickhouse-local should be able to run a simple query without throw/catch.
466
    if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe)
467
        DB::terminate_on_any_exception = true;
468

469
    /// Reset new handler to default (that throws std::bad_alloc)
470
    /// It is needed because LLVM library clobbers it.
471
    std::set_new_handler(nullptr);
472

473
    std::vector<char *> argv(argv_, argv_ + argc_);
474

475
    /// Print a basic help if nothing was matched
476
    MainFunc main_func = printHelp;
477

478
    for (auto & application : clickhouse_applications)
479
    {
480
        if (isClickhouseApp(application.first, argv))
481
        {
482
            main_func = application.second;
483
            break;
484
        }
485
    }
486

487
    /// Interpret binary without argument or with arguments starts with dash
488
    /// ('-') as clickhouse-local for better usability:
489
    ///
490
    ///     clickhouse help # dumps help
491
    ///     clickhouse -q 'select 1' # use local
492
    ///     clickhouse # spawn local
493
    ///     clickhouse local # spawn local
494
    ///
495
    if (main_func == printHelp && !argv.empty() && (argv.size() == 1 || argv[1][0] == '-'))
496
        main_func = mainEntryClickHouseLocal;
497

498
    int exit_code = main_func(static_cast<int>(argv.size()), argv.data());
499

500
#if defined(SANITIZE_COVERAGE)
501
    dumpCoverage();
502
#endif
503

504
    return exit_code;
505
}
506

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.