29
#include "qemu/osdep.h"
32
#include <glib/gprintf.h>
34
#include "sysemu/sysemu.h"
36
#include "qapi/error.h"
37
#include "qemu/error-report.h"
38
#include "qemu/madvise.h"
39
#include "qemu/sockets.h"
40
#include "qemu/thread.h"
42
#include "qemu/cutils.h"
43
#include "qemu/units.h"
44
#include "qemu/thread-context.h"
45
#include "qemu/main-loop.h"
48
#include <sys/syscall.h>
61
#include "qemu/mmap-alloc.h"
63
#define MAX_MEM_PREALLOC_THREAD_COUNT 16
67
static QLIST_HEAD(, MemsetContext) memset_contexts =
68
QLIST_HEAD_INITIALIZER(memset_contexts);
70
typedef struct MemsetContext {
71
bool all_threads_created;
72
bool any_thread_failed;
73
struct MemsetThread *threads;
75
QLIST_ENTRY(MemsetContext) next;
84
MemsetContext *context;
86
typedef struct MemsetThread MemsetThread;
89
static MemsetContext *sigbus_memset_context;
90
struct sigaction sigbus_oldact;
91
static QemuMutex sigbus_mutex;
93
static QemuMutex page_mutex;
94
static QemuCond page_cond;
96
int qemu_get_thread_id(void)
99
return syscall(SYS_gettid);
100
#elif defined(__FreeBSD__)
105
#elif defined(__NetBSD__)
107
#elif defined(__OpenBSD__)
114
int qemu_daemon(int nochdir, int noclose)
116
return daemon(nochdir, noclose);
119
bool qemu_write_pidfile(const char *path, Error **errp)
126
struct flock lock = {
128
.l_whence = SEEK_SET,
132
fd = qemu_create(path, O_WRONLY, S_IRUSR | S_IWUSR, errp);
137
if (fstat(fd, &b) < 0) {
138
error_setg_errno(errp, errno, "Cannot stat file");
142
if (fcntl(fd, F_SETLK, &lock)) {
143
error_setg_errno(errp, errno, "Cannot lock pid file");
151
if (stat(path, &a) < 0) {
160
if (a.st_ino == b.st_ino) {
171
if (ftruncate(fd, 0) < 0) {
172
error_setg_errno(errp, errno, "Failed to truncate pid file");
176
snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
177
if (qemu_write_full(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
178
error_setg(errp, "Failed to write pid file");
192
void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
195
const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
196
(noreserve ? QEMU_MAP_NORESERVE : 0);
197
size_t align = QEMU_VMALLOC_ALIGN;
198
void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
200
if (ptr == MAP_FAILED) {
208
trace_qemu_anon_ram_alloc(size, ptr);
212
void qemu_anon_ram_free(void *ptr, size_t size)
214
trace_qemu_anon_ram_free(ptr, size);
215
qemu_ram_munmap(-1, ptr, size);
218
void qemu_socket_set_block(int fd)
220
g_unix_set_fd_nonblocking(fd, false, NULL);
223
int qemu_socket_try_set_nonblock(int fd)
225
return g_unix_set_fd_nonblocking(fd, true, NULL) ? 0 : -errno;
228
void qemu_socket_set_nonblock(int fd)
231
f = qemu_socket_try_set_nonblock(fd);
235
int socket_set_fast_reuse(int fd)
239
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
240
(const char *)&val, sizeof(val));
247
void qemu_set_cloexec(int fd)
250
f = fcntl(fd, F_GETFD);
252
f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
256
int qemu_socketpair(int domain, int type, int protocol, int sv[2])
261
ret = socketpair(domain, type | SOCK_CLOEXEC, protocol, sv);
262
if (ret != -1 || errno != EINVAL) {
266
ret = socketpair(domain, type, protocol, sv);
268
qemu_set_cloexec(sv[0]);
269
qemu_set_cloexec(sv[1]);
276
qemu_get_local_state_dir(void)
278
return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
281
void qemu_set_tty_echo(int fd, bool echo)
288
tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
290
tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
293
tcsetattr(fd, TCSANOW, &tty);
297
static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
299
static void sigbus_handler(int signal)
304
if (sigbus_memset_context) {
305
for (i = 0; i < sigbus_memset_context->num_threads; i++) {
306
MemsetThread *thread = &sigbus_memset_context->threads[i];
308
if (qemu_thread_is_self(&thread->pgthread)) {
309
siglongjmp(thread->env, 1);
327
if (sigbus_oldact.sa_flags & SA_SIGINFO) {
328
sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
332
warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
335
static void *do_touch_pages(void *arg)
337
MemsetThread *memset_args = (MemsetThread *)arg;
338
sigset_t set, oldset;
346
qemu_mutex_lock(&page_mutex);
347
while (!memset_args->context->all_threads_created) {
348
qemu_cond_wait(&page_cond, &page_mutex);
350
qemu_mutex_unlock(&page_mutex);
354
sigaddset(&set, SIGBUS);
355
pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
357
if (sigsetjmp(memset_args->env, 1)) {
360
char *addr = memset_args->addr;
361
size_t numpages = memset_args->numpages;
362
size_t hpagesize = memset_args->hpagesize;
364
for (i = 0; i < numpages; i++) {
373
*(volatile char *)addr = *addr;
377
pthread_sigmask(SIG_SETMASK, &oldset, NULL);
378
return (void *)(uintptr_t)ret;
381
static void *do_madv_populate_write_pages(void *arg)
383
MemsetThread *memset_args = (MemsetThread *)arg;
384
const size_t size = memset_args->numpages * memset_args->hpagesize;
385
char * const addr = memset_args->addr;
389
qemu_mutex_lock(&page_mutex);
390
while (!memset_args->context->all_threads_created) {
391
qemu_cond_wait(&page_cond, &page_mutex);
393
qemu_mutex_unlock(&page_mutex);
395
if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
398
return (void *)(uintptr_t)ret;
401
static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
404
long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
407
if (host_procs > 0) {
408
ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
412
ret = MIN(ret, numpages);
414
ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
420
static int wait_and_free_mem_prealloc_context(MemsetContext *context)
424
for (i = 0; i < context->num_threads; i++) {
425
tmp = (uintptr_t)qemu_thread_join(&context->threads[i].pgthread);
431
g_free(context->threads);
436
static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
437
int max_threads, ThreadContext *tc, bool async,
438
bool use_madv_populate_write)
440
static gsize initialized = 0;
441
MemsetContext *context = g_malloc0(sizeof(MemsetContext));
442
size_t numpages_per_thread, leftover;
443
void *(*touch_fn)(void *);
451
if (!use_madv_populate_write || !tc) {
455
context->num_threads =
456
get_memset_num_threads(hpagesize, numpages, max_threads);
458
if (g_once_init_enter(&initialized)) {
459
qemu_mutex_init(&page_mutex);
460
qemu_cond_init(&page_cond);
461
g_once_init_leave(&initialized, 1);
464
if (use_madv_populate_write) {
469
if (context->num_threads == 1 && !async) {
471
if (qemu_madvise(area, hpagesize * numpages,
472
QEMU_MADV_POPULATE_WRITE)) {
478
touch_fn = do_madv_populate_write_pages;
480
touch_fn = do_touch_pages;
483
context->threads = g_new0(MemsetThread, context->num_threads);
484
numpages_per_thread = numpages / context->num_threads;
485
leftover = numpages % context->num_threads;
486
for (i = 0; i < context->num_threads; i++) {
487
context->threads[i].addr = addr;
488
context->threads[i].numpages = numpages_per_thread + (i < leftover);
489
context->threads[i].hpagesize = hpagesize;
490
context->threads[i].context = context;
492
thread_context_create_thread(tc, &context->threads[i].pgthread,
494
touch_fn, &context->threads[i],
495
QEMU_THREAD_JOINABLE);
497
qemu_thread_create(&context->threads[i].pgthread, "touch_pages",
498
touch_fn, &context->threads[i],
499
QEMU_THREAD_JOINABLE);
501
addr += context->threads[i].numpages * hpagesize;
509
assert(bql_locked());
510
QLIST_INSERT_HEAD(&memset_contexts, context, next);
514
if (!use_madv_populate_write) {
515
sigbus_memset_context = context;
518
qemu_mutex_lock(&page_mutex);
519
context->all_threads_created = true;
520
qemu_cond_broadcast(&page_cond);
521
qemu_mutex_unlock(&page_mutex);
523
ret = wait_and_free_mem_prealloc_context(context);
525
if (!use_madv_populate_write) {
526
sigbus_memset_context = NULL;
531
bool qemu_finish_async_prealloc_mem(Error **errp)
534
MemsetContext *context, *next_context;
537
assert(bql_locked());
538
if (QLIST_EMPTY(&memset_contexts)) {
542
qemu_mutex_lock(&page_mutex);
543
QLIST_FOREACH(context, &memset_contexts, next) {
544
context->all_threads_created = true;
546
qemu_cond_broadcast(&page_cond);
547
qemu_mutex_unlock(&page_mutex);
549
QLIST_FOREACH_SAFE(context, &memset_contexts, next, next_context) {
550
QLIST_REMOVE(context, next);
551
tmp = wait_and_free_mem_prealloc_context(context);
558
error_setg_errno(errp, -ret,
559
"qemu_prealloc_mem: preallocating memory failed");
565
static bool madv_populate_write_possible(char *area, size_t pagesize)
567
return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
571
bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
572
ThreadContext *tc, bool async, Error **errp)
574
static gsize initialized;
576
size_t hpagesize = qemu_fd_getpagesize(fd);
577
size_t numpages = DIV_ROUND_UP(sz, hpagesize);
578
bool use_madv_populate_write;
579
struct sigaction act;
586
use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
588
if (!use_madv_populate_write) {
589
if (g_once_init_enter(&initialized)) {
590
qemu_mutex_init(&sigbus_mutex);
591
g_once_init_leave(&initialized, 1);
594
qemu_mutex_lock(&sigbus_mutex);
595
memset(&act, 0, sizeof(act));
597
act.sa_sigaction = &sigbus_handler;
598
act.sa_flags = SA_SIGINFO;
600
act.sa_handler = &sigbus_handler;
604
ret = sigaction(SIGBUS, &act, &sigbus_oldact);
606
qemu_mutex_unlock(&sigbus_mutex);
607
error_setg_errno(errp, errno,
608
"qemu_prealloc_mem: failed to install signal handler");
614
ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async,
615
use_madv_populate_write);
617
error_setg_errno(errp, -ret,
618
"qemu_prealloc_mem: preallocating memory failed");
622
if (!use_madv_populate_write) {
623
ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
626
perror("qemu_prealloc_mem: failed to reinstall signal handler");
629
qemu_mutex_unlock(&sigbus_mutex);
634
char *qemu_get_pid_name(pid_t pid)
638
#if defined(__FreeBSD__)
640
struct kinfo_proc *proc = kinfo_getproc(pid);
643
name = g_strdup(proc->ki_comm);
651
pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
652
g_file_get_contents(pid_path, &name, &len, NULL);
660
void *qemu_alloc_stack(size_t *sz)
664
#ifdef CONFIG_DEBUG_STACK_USAGE
667
size_t pagesz = qemu_real_host_page_size();
668
#ifdef _SC_THREAD_STACK_MIN
670
long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
671
*sz = MAX(MAX(min_stack_sz, 0), *sz);
674
*sz = ROUND_UP(*sz, pagesz);
678
flags = MAP_PRIVATE | MAP_ANONYMOUS;
679
#if defined(MAP_STACK) && defined(__OpenBSD__)
689
ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
690
if (ptr == MAP_FAILED) {
691
perror("failed to allocate memory for stack");
696
if (mprotect(ptr, pagesz, PROT_NONE) != 0) {
697
perror("failed to set up stack guard page");
701
#ifdef CONFIG_DEBUG_STACK_USAGE
702
for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
703
*(uint32_t *)ptr2 = 0xdeadbeaf;
710
#ifdef CONFIG_DEBUG_STACK_USAGE
711
static __thread unsigned int max_stack_usage;
714
void qemu_free_stack(void *stack, size_t sz)
716
#ifdef CONFIG_DEBUG_STACK_USAGE
720
for (ptr = stack + qemu_real_host_page_size(); ptr < stack + sz;
721
ptr += sizeof(uint32_t)) {
722
if (*(uint32_t *)ptr != 0xdeadbeaf) {
726
usage = sz - (uintptr_t) (ptr - stack);
727
if (usage > max_stack_usage) {
728
error_report("thread %d max stack usage increased from %u to %u",
729
qemu_get_thread_id(), max_stack_usage, usage);
730
max_stack_usage = usage;
747
void sigaction_invoke(struct sigaction *action,
748
struct qemu_signalfd_siginfo *info)
751
si.si_signo = info->ssi_signo;
752
si.si_errno = info->ssi_errno;
753
si.si_code = info->ssi_code;
762
if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
763
info->ssi_code <= 0) {
765
si.si_pid = info->ssi_pid;
766
si.si_uid = info->ssi_uid;
767
} else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
768
info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
769
si.si_addr = (void *)(uintptr_t)info->ssi_addr;
770
} else if (info->ssi_signo == SIGCHLD) {
771
si.si_pid = info->ssi_pid;
772
si.si_status = info->ssi_status;
773
si.si_uid = info->ssi_uid;
775
action->sa_sigaction(info->ssi_signo, &si, NULL);
778
size_t qemu_get_host_physmem(void)
781
long pages = sysconf(_SC_PHYS_PAGES);
783
if (pages > SIZE_MAX / qemu_real_host_page_size()) {
786
return pages * qemu_real_host_page_size();
793
int qemu_msync(void *addr, size_t length, int fd)
795
size_t align_mask = ~(qemu_real_host_page_size() - 1);
803
length += ((uintptr_t)addr & (qemu_real_host_page_size() - 1));
804
length = (length + ~align_mask) & align_mask;
806
addr = (void *)((uintptr_t)addr & align_mask);
808
return msync(addr, length, MS_SYNC);
811
static bool qemu_close_all_open_fd_proc(const int *skip, unsigned int nskip)
816
unsigned int skip_start = 0, skip_end = nskip;
818
dir = opendir("/proc/self/fd");
826
for (de = readdir(dir); de; de = readdir(dir)) {
827
bool close_fd = true;
829
if (de->d_name[0] == '.') {
832
fd = atoi(de->d_name);
837
for (unsigned int i = skip_start; i < skip_end; i++) {
841
} else if (fd == skip[i]) {
844
if (i == skip_start) {
846
} else if (i == skip_end) {
862
static bool qemu_close_all_open_fd_close_range(const int *skip,
866
#ifdef CONFIG_CLOSE_RANGE
867
int max_fd = open_max - 1;
869
unsigned int cur_skip = 0;
874
while (cur_skip < nskip && first == skip[cur_skip]) {
881
if (cur_skip < nskip) {
882
last = skip[cur_skip] - 1;
883
last = MIN(last, max_fd);
891
ret = close_range(first, last, 0);
897
} while (last < max_fd);
905
static void qemu_close_all_open_fd_fallback(const int *skip, unsigned int nskip,
908
unsigned int cur_skip = 0;
911
for (int i = 0; i < open_max; i++) {
912
if (cur_skip < nskip && i == skip[cur_skip]) {
923
void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
925
int open_max = sysconf(_SC_OPEN_MAX);
927
assert(skip != NULL || nskip == 0);
929
if (!qemu_close_all_open_fd_close_range(skip, nskip, open_max) &&
930
!qemu_close_all_open_fd_proc(skip, nskip)) {
931
qemu_close_all_open_fd_fallback(skip, nskip, open_max);