qemu
1/*
2* Memory region management for Tiny Code Generator for QEMU
3*
4* Copyright (c) 2008 Fabrice Bellard
5*
6* Permission is hereby granted, free of charge, to any person obtaining a copy
7* of this software and associated documentation files (the "Software"), to deal
8* in the Software without restriction, including without limitation the rights
9* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10* copies of the Software, and to permit persons to whom the Software is
11* furnished to do so, subject to the following conditions:
12*
13* The above copyright notice and this permission notice shall be included in
14* all copies or substantial portions of the Software.
15*
16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22* THE SOFTWARE.
23*/
24
25#include "qemu/osdep.h"26#include "qemu/units.h"27#include "qemu/madvise.h"28#include "qemu/mprotect.h"29#include "qemu/memalign.h"30#include "qemu/cacheinfo.h"31#include "qemu/qtree.h"32#include "qapi/error.h"33#include "tcg/tcg.h"34#include "exec/translation-block.h"35#include "tcg-internal.h"36#include "host/cpuinfo.h"37
38
39/*
40* Local source-level compatibility with Unix.
41* Used by tcg_region_init below.
42*/
43#if defined(_WIN32)44#define PROT_READ 145#define PROT_WRITE 246#define PROT_EXEC 447#endif48
49struct tcg_region_tree {50QemuMutex lock;51QTree *tree;52/* padding to avoid false sharing is computed at run-time */53};54
55/*
56* We divide code_gen_buffer into equally-sized "regions" that TCG threads
57* dynamically allocate from as demand dictates. Given appropriate region
58* sizing, this minimizes flushes even when some TCG threads generate a lot
59* more code than others.
60*/
61struct tcg_region_state {62QemuMutex lock;63
64/* fields set at init time */65void *start_aligned;66void *after_prologue;67size_t n;68size_t size; /* size of one region */69size_t stride; /* .size + guard size */70size_t total_size; /* size of entire buffer, >= n * stride */71
72/* fields protected by the lock */73size_t current; /* current region index */74size_t agg_size_full; /* aggregate size of full regions */75};76
77static struct tcg_region_state region;78
79/*
80* This is an array of struct tcg_region_tree's, with padding.
81* We use void * to simplify the computation of region_trees[i]; each
82* struct is found every tree_size bytes.
83*/
84static void *region_trees;85static size_t tree_size;86
87bool in_code_gen_buffer(const void *p)88{
89/*90* Much like it is valid to have a pointer to the byte past the
91* end of an array (so long as you don't dereference it), allow
92* a pointer to the byte past the end of the code gen buffer.
93*/
94return (size_t)(p - region.start_aligned) <= region.total_size;95}
96
97#ifndef CONFIG_TCG_INTERPRETER98static int host_prot_read_exec(void)99{
100#if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI)101if (cpuinfo & CPUINFO_BTI) {102return PROT_READ | PROT_EXEC | PROT_BTI;103}104#endif105return PROT_READ | PROT_EXEC;106}
107#endif108
109#ifdef CONFIG_DEBUG_TCG110const void *tcg_splitwx_to_rx(void *rw)111{
112/* Pass NULL pointers unchanged. */113if (rw) {114g_assert(in_code_gen_buffer(rw));115rw += tcg_splitwx_diff;116}117return rw;118}
119
120void *tcg_splitwx_to_rw(const void *rx)121{
122/* Pass NULL pointers unchanged. */123if (rx) {124rx -= tcg_splitwx_diff;125/* Assert that we end with a pointer in the rw region. */126g_assert(in_code_gen_buffer(rx));127}128return (void *)rx;129}
130#endif /* CONFIG_DEBUG_TCG */131
132/* compare a pointer @ptr and a tb_tc @s */
133static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)134{
135if (ptr >= s->ptr + s->size) {136return 1;137} else if (ptr < s->ptr) {138return -1;139}140return 0;141}
142
143static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)144{
145const struct tb_tc *a = ap;146const struct tb_tc *b = bp;147
148/*149* When both sizes are set, we know this isn't a lookup.
150* This is the most likely case: every TB must be inserted; lookups
151* are a lot less frequent.
152*/
153if (likely(a->size && b->size)) {154if (a->ptr > b->ptr) {155return 1;156} else if (a->ptr < b->ptr) {157return -1;158}159/* a->ptr == b->ptr should happen only on deletions */160g_assert(a->size == b->size);161return 0;162}163/*164* All lookups have either .size field set to 0.
165* From the glib sources we see that @ap is always the lookup key. However
166* the docs provide no guarantee, so we just mark this case as likely.
167*/
168if (likely(a->size == 0)) {169return ptr_cmp_tb_tc(a->ptr, b);170}171return ptr_cmp_tb_tc(b->ptr, a);172}
173
174static void tb_destroy(gpointer value)175{
176TranslationBlock *tb = value;177qemu_spin_destroy(&tb->jmp_lock);178}
179
180static void tcg_region_trees_init(void)181{
182size_t i;183
184tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);185region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);186for (i = 0; i < region.n; i++) {187struct tcg_region_tree *rt = region_trees + i * tree_size;188
189qemu_mutex_init(&rt->lock);190rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);191}192}
193
194static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)195{
196size_t region_idx;197
198/*199* Like tcg_splitwx_to_rw, with no assert. The pc may come from
200* a signal handler over which the caller has no control.
201*/
202if (!in_code_gen_buffer(p)) {203p -= tcg_splitwx_diff;204if (!in_code_gen_buffer(p)) {205return NULL;206}207}208
209if (p < region.start_aligned) {210region_idx = 0;211} else {212ptrdiff_t offset = p - region.start_aligned;213
214if (offset > region.stride * (region.n - 1)) {215region_idx = region.n - 1;216} else {217region_idx = offset / region.stride;218}219}220return region_trees + region_idx * tree_size;221}
222
223void tcg_tb_insert(TranslationBlock *tb)224{
225struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);226
227g_assert(rt != NULL);228qemu_mutex_lock(&rt->lock);229q_tree_insert(rt->tree, &tb->tc, tb);230qemu_mutex_unlock(&rt->lock);231}
232
233void tcg_tb_remove(TranslationBlock *tb)234{
235struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);236
237g_assert(rt != NULL);238qemu_mutex_lock(&rt->lock);239q_tree_remove(rt->tree, &tb->tc);240qemu_mutex_unlock(&rt->lock);241}
242
243/*
244* Find the TB 'tb' such that
245* tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
246* Return NULL if not found.
247*/
248TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)249{
250struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);251TranslationBlock *tb;252struct tb_tc s = { .ptr = (void *)tc_ptr };253
254if (rt == NULL) {255return NULL;256}257
258qemu_mutex_lock(&rt->lock);259tb = q_tree_lookup(rt->tree, &s);260qemu_mutex_unlock(&rt->lock);261return tb;262}
263
264static void tcg_region_tree_lock_all(void)265{
266size_t i;267
268for (i = 0; i < region.n; i++) {269struct tcg_region_tree *rt = region_trees + i * tree_size;270
271qemu_mutex_lock(&rt->lock);272}273}
274
275static void tcg_region_tree_unlock_all(void)276{
277size_t i;278
279for (i = 0; i < region.n; i++) {280struct tcg_region_tree *rt = region_trees + i * tree_size;281
282qemu_mutex_unlock(&rt->lock);283}284}
285
286void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)287{
288size_t i;289
290tcg_region_tree_lock_all();291for (i = 0; i < region.n; i++) {292struct tcg_region_tree *rt = region_trees + i * tree_size;293
294q_tree_foreach(rt->tree, func, user_data);295}296tcg_region_tree_unlock_all();297}
298
299size_t tcg_nb_tbs(void)300{
301size_t nb_tbs = 0;302size_t i;303
304tcg_region_tree_lock_all();305for (i = 0; i < region.n; i++) {306struct tcg_region_tree *rt = region_trees + i * tree_size;307
308nb_tbs += q_tree_nnodes(rt->tree);309}310tcg_region_tree_unlock_all();311return nb_tbs;312}
313
314static void tcg_region_tree_reset_all(void)315{
316size_t i;317
318tcg_region_tree_lock_all();319for (i = 0; i < region.n; i++) {320struct tcg_region_tree *rt = region_trees + i * tree_size;321
322/* Increment the refcount first so that destroy acts as a reset */323q_tree_ref(rt->tree);324q_tree_destroy(rt->tree);325}326tcg_region_tree_unlock_all();327}
328
329static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)330{
331void *start, *end;332
333start = region.start_aligned + curr_region * region.stride;334end = start + region.size;335
336if (curr_region == 0) {337start = region.after_prologue;338}339/* The final region may have a few extra pages due to earlier rounding. */340if (curr_region == region.n - 1) {341end = region.start_aligned + region.total_size;342}343
344*pstart = start;345*pend = end;346}
347
348static void tcg_region_assign(TCGContext *s, size_t curr_region)349{
350void *start, *end;351
352tcg_region_bounds(curr_region, &start, &end);353
354s->code_gen_buffer = start;355s->code_gen_ptr = start;356s->code_gen_buffer_size = end - start;357s->code_gen_highwater = end - TCG_HIGHWATER;358}
359
360static bool tcg_region_alloc__locked(TCGContext *s)361{
362if (region.current == region.n) {363return true;364}365tcg_region_assign(s, region.current);366region.current++;367return false;368}
369
370/*
371* Request a new region once the one in use has filled up.
372* Returns true on error.
373*/
374bool tcg_region_alloc(TCGContext *s)375{
376bool err;377/* read the region size now; alloc__locked will overwrite it on success */378size_t size_full = s->code_gen_buffer_size;379
380qemu_mutex_lock(®ion.lock);381err = tcg_region_alloc__locked(s);382if (!err) {383region.agg_size_full += size_full - TCG_HIGHWATER;384}385qemu_mutex_unlock(®ion.lock);386return err;387}
388
389/*
390* Perform a context's first region allocation.
391* This function does _not_ increment region.agg_size_full.
392*/
393static void tcg_region_initial_alloc__locked(TCGContext *s)394{
395bool err = tcg_region_alloc__locked(s);396g_assert(!err);397}
398
399void tcg_region_initial_alloc(TCGContext *s)400{
401qemu_mutex_lock(®ion.lock);402tcg_region_initial_alloc__locked(s);403qemu_mutex_unlock(®ion.lock);404}
405
406/* Call from a safe-work context */
407void tcg_region_reset_all(void)408{
409unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);410unsigned int i;411
412qemu_mutex_lock(®ion.lock);413region.current = 0;414region.agg_size_full = 0;415
416for (i = 0; i < n_ctxs; i++) {417TCGContext *s = qatomic_read(&tcg_ctxs[i]);418tcg_region_initial_alloc__locked(s);419}420qemu_mutex_unlock(®ion.lock);421
422tcg_region_tree_reset_all();423}
424
425static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)426{
427#ifdef CONFIG_USER_ONLY428return 1;429#else430size_t n_regions;431
432/*433* It is likely that some vCPUs will translate more code than others,
434* so we first try to set more regions than max_cpus, with those regions
435* being of reasonable size. If that's not possible we make do by evenly
436* dividing the code_gen_buffer among the vCPUs.
437*/
438/* Use a single region if all we have is one vCPU thread */439if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {440return 1;441}442
443/*444* Try to have more regions than max_cpus, with each region being >= 2 MB.
445* If we can't, then just allocate one region per vCPU thread.
446*/
447n_regions = tb_size / (2 * MiB);448if (n_regions <= max_cpus) {449return max_cpus;450}451return MIN(n_regions, max_cpus * 8);452#endif453}
454
455/*
456* Minimum size of the code gen buffer. This number is randomly chosen,
457* but not so small that we can't have a fair number of TB's live.
458*
459* Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
460* Unless otherwise indicated, this is constrained by the range of
461* direct branches on the host cpu, as used by the TCG implementation
462* of goto_tb.
463*/
464#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)465
466#if TCG_TARGET_REG_BITS == 32467#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)468#ifdef CONFIG_USER_ONLY469/*
470* For user mode on smaller 32 bit systems we may run into trouble
471* allocating big chunks of data in the right place. On these systems
472* we utilise a static code generation buffer directly in the binary.
473*/
474#define USE_STATIC_CODE_GEN_BUFFER475#endif476#else /* TCG_TARGET_REG_BITS == 64 */477#ifdef CONFIG_USER_ONLY478/*
479* As user-mode emulation typically means running multiple instances
480* of the translator don't go too nuts with our default code gen
481* buffer lest we make things too hard for the OS.
482*/
483#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)484#else485/*
486* We expect most system emulation to run one or two guests per host.
487* Users running large scale system emulation may want to tweak their
488* runtime setup via the tb-size control on the command line.
489*/
490#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)491#endif492#endif493
494#define DEFAULT_CODE_GEN_BUFFER_SIZE \495(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \496? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)497
498#ifdef USE_STATIC_CODE_GEN_BUFFER499static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]500__attribute__((aligned(CODE_GEN_ALIGN)));501
502static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)503{
504void *buf, *end;505size_t size;506
507if (splitwx > 0) {508error_setg(errp, "jit split-wx not supported");509return -1;510}511
512/* page-align the beginning and end of the buffer */513buf = static_code_gen_buffer;514end = static_code_gen_buffer + sizeof(static_code_gen_buffer);515buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());516end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());517
518size = end - buf;519
520/* Honor a command-line option limiting the size of the buffer. */521if (size > tb_size) {522size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());523}524
525region.start_aligned = buf;526region.total_size = size;527
528return PROT_READ | PROT_WRITE;529}
530#elif defined(_WIN32)531static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)532{
533void *buf;534
535if (splitwx > 0) {536error_setg(errp, "jit split-wx not supported");537return -1;538}539
540buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,541PAGE_EXECUTE_READWRITE);542if (buf == NULL) {543error_setg_win32(errp, GetLastError(),544"allocate %zu bytes for jit buffer", size);545return false;546}547
548region.start_aligned = buf;549region.total_size = size;550
551return PROT_READ | PROT_WRITE | PROT_EXEC;552}
553#else554static int alloc_code_gen_buffer_anon(size_t size, int prot,555int flags, Error **errp)556{
557void *buf;558
559buf = mmap(NULL, size, prot, flags, -1, 0);560if (buf == MAP_FAILED) {561error_setg_errno(errp, errno,562"allocate %zu bytes for jit buffer", size);563return -1;564}565
566region.start_aligned = buf;567region.total_size = size;568return prot;569}
570
571#ifndef CONFIG_TCG_INTERPRETER572#ifdef CONFIG_POSIX573#include "qemu/memfd.h"574
575static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)576{
577void *buf_rw = NULL, *buf_rx = MAP_FAILED;578int fd = -1;579
580buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);581if (buf_rw == NULL) {582goto fail;583}584
585buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0);586if (buf_rx == MAP_FAILED) {587error_setg_errno(errp, errno,588"failed to map shared memory for execute");589goto fail;590}591
592close(fd);593region.start_aligned = buf_rw;594region.total_size = size;595tcg_splitwx_diff = buf_rx - buf_rw;596
597return PROT_READ | PROT_WRITE;598
599fail:600/* buf_rx is always equal to MAP_FAILED here and does not require cleanup */601if (buf_rw) {602munmap(buf_rw, size);603}604if (fd >= 0) {605close(fd);606}607return -1;608}
609#endif /* CONFIG_POSIX */610
611#ifdef CONFIG_DARWIN612#include <mach/mach.h>613
614extern kern_return_t mach_vm_remap(vm_map_t target_task,615mach_vm_address_t *target_address,616mach_vm_size_t size,617mach_vm_offset_t mask,618int flags,619vm_map_t src_task,620mach_vm_address_t src_address,621boolean_t copy,622vm_prot_t *cur_protection,623vm_prot_t *max_protection,624vm_inherit_t inheritance);625
626static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)627{
628kern_return_t ret;629mach_vm_address_t buf_rw, buf_rx;630vm_prot_t cur_prot, max_prot;631
632/* Map the read-write portion via normal anon memory. */633if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,634MAP_PRIVATE | MAP_ANONYMOUS, errp)) {635return -1;636}637
638buf_rw = (mach_vm_address_t)region.start_aligned;639buf_rx = 0;640ret = mach_vm_remap(mach_task_self(),641&buf_rx,642size,6430,644VM_FLAGS_ANYWHERE,645mach_task_self(),646buf_rw,647false,648&cur_prot,649&max_prot,650VM_INHERIT_NONE);651if (ret != KERN_SUCCESS) {652/* TODO: Convert "ret" to a human readable error message. */653error_setg(errp, "vm_remap for jit splitwx failed");654munmap((void *)buf_rw, size);655return -1;656}657
658if (mprotect((void *)buf_rx, size, host_prot_read_exec()) != 0) {659error_setg_errno(errp, errno, "mprotect for jit splitwx");660munmap((void *)buf_rx, size);661munmap((void *)buf_rw, size);662return -1;663}664
665tcg_splitwx_diff = buf_rx - buf_rw;666return PROT_READ | PROT_WRITE;667}
668#endif /* CONFIG_DARWIN */669#endif /* CONFIG_TCG_INTERPRETER */670
671static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)672{
673#ifndef CONFIG_TCG_INTERPRETER674# ifdef CONFIG_DARWIN675return alloc_code_gen_buffer_splitwx_vmremap(size, errp);676# endif677# ifdef CONFIG_POSIX678return alloc_code_gen_buffer_splitwx_memfd(size, errp);679# endif680#endif681error_setg(errp, "jit split-wx not supported");682return -1;683}
684
685static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)686{
687ERRP_GUARD();688int prot, flags;689
690if (splitwx) {691prot = alloc_code_gen_buffer_splitwx(size, errp);692if (prot >= 0) {693return prot;694}695/*696* If splitwx force-on (1), fail;
697* if splitwx default-on (-1), fall through to splitwx off.
698*/
699if (splitwx > 0) {700return -1;701}702error_free_or_abort(errp);703}704
705/*706* macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
707* rejects a permission change from RWX -> NONE when reserving the
708* guard pages later. We can go the other way with the same number
709* of syscalls, so always begin with PROT_NONE.
710*/
711prot = PROT_NONE;712flags = MAP_PRIVATE | MAP_ANONYMOUS;713#ifdef CONFIG_DARWIN714/* Applicable to both iOS and macOS (Apple Silicon). */715if (!splitwx) {716flags |= MAP_JIT;717}718#endif719
720return alloc_code_gen_buffer_anon(size, prot, flags, errp);721}
722#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */723
724/*
725* Initializes region partitioning.
726*
727* Called at init time from the parent thread (i.e. the one calling
728* tcg_context_init), after the target's TCG globals have been set.
729*
730* Region partitioning works by splitting code_gen_buffer into separate regions,
731* and then assigning regions to TCG threads so that the threads can translate
732* code in parallel without synchronization.
733*
734* In system-mode the number of TCG threads is bounded by max_cpus, so we use at
735* least max_cpus regions in MTTCG. In !MTTCG we use a single region.
736* Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
737* must have been parsed before calling this function, since it calls
738* qemu_tcg_mttcg_enabled().
739*
740* In user-mode we use a single region. Having multiple regions in user-mode
741* is not supported, because the number of vCPU threads (recall that each thread
742* spawned by the guest corresponds to a vCPU thread) is only bounded by the
743* OS, and usually this number is huge (tens of thousands is not uncommon).
744* Thus, given this large bound on the number of vCPU threads and the fact
745* that code_gen_buffer is allocated at compile-time, we cannot guarantee
746* that the availability of at least one region per vCPU thread.
747*
748* However, this user-mode limitation is unlikely to be a significant problem
749* in practice. Multi-threaded guests share most if not all of their translated
750* code, which makes parallel code generation less appealing than in system-mode
751*/
752void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)753{
754const size_t page_size = qemu_real_host_page_size();755size_t region_size;756int have_prot, need_prot;757
758/* Size the buffer. */759if (tb_size == 0) {760size_t phys_mem = qemu_get_host_physmem();761if (phys_mem == 0) {762tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;763} else {764tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);765tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);766}767}768if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {769tb_size = MIN_CODE_GEN_BUFFER_SIZE;770}771if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {772tb_size = MAX_CODE_GEN_BUFFER_SIZE;773}774
775have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);776assert(have_prot >= 0);777
778/* Request large pages for the buffer and the splitwx. */779qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);780if (tcg_splitwx_diff) {781qemu_madvise(region.start_aligned + tcg_splitwx_diff,782region.total_size, QEMU_MADV_HUGEPAGE);783}784
785/*786* Make region_size a multiple of page_size, using aligned as the start.
787* As a result of this we might end up with a few extra pages at the end of
788* the buffer; we will assign those to the last region.
789*/
790region.n = tcg_n_regions(tb_size, max_cpus);791region_size = tb_size / region.n;792region_size = QEMU_ALIGN_DOWN(region_size, page_size);793
794/* A region must have at least 2 pages; one code, one guard */795g_assert(region_size >= 2 * page_size);796region.stride = region_size;797
798/* Reserve space for guard pages. */799region.size = region_size - page_size;800region.total_size -= page_size;801
802/*803* The first region will be smaller than the others, via the prologue,
804* which has yet to be allocated. For now, the first region begins at
805* the page boundary.
806*/
807region.after_prologue = region.start_aligned;808
809/* init the region struct */810qemu_mutex_init(®ion.lock);811
812/*813* Set guard pages in the rw buffer, as that's the one into which
814* buffer overruns could occur. Do not set guard pages in the rx
815* buffer -- let that one use hugepages throughout.
816* Work with the page protections set up with the initial mapping.
817*/
818need_prot = PROT_READ | PROT_WRITE;819#ifndef CONFIG_TCG_INTERPRETER820if (tcg_splitwx_diff == 0) {821need_prot |= host_prot_read_exec();822}823#endif824for (size_t i = 0, n = region.n; i < n; i++) {825void *start, *end;826
827tcg_region_bounds(i, &start, &end);828if (have_prot != need_prot) {829int rc;830
831if (need_prot == (PROT_READ | PROT_WRITE | PROT_EXEC)) {832rc = qemu_mprotect_rwx(start, end - start);833} else if (need_prot == (PROT_READ | PROT_WRITE)) {834rc = qemu_mprotect_rw(start, end - start);835} else {836#ifdef CONFIG_POSIX837rc = mprotect(start, end - start, need_prot);838#else839g_assert_not_reached();840#endif841}842if (rc) {843error_setg_errno(&error_fatal, errno,844"mprotect of jit buffer");845}846}847if (have_prot != 0) {848/* Guard pages are nice for bug detection but are not essential. */849(void)qemu_mprotect_none(end, page_size);850}851}852
853tcg_region_trees_init();854
855/*856* Leave the initial context initialized to the first region.
857* This will be the context into which we generate the prologue.
858* It is also the only context for CONFIG_USER_ONLY.
859*/
860tcg_region_initial_alloc__locked(&tcg_init_ctx);861}
862
863void tcg_region_prologue_set(TCGContext *s)864{
865/* Deduct the prologue from the first region. */866g_assert(region.start_aligned == s->code_gen_buffer);867region.after_prologue = s->code_ptr;868
869/* Recompute boundaries of the first region. */870tcg_region_assign(s, 0);871
872/* Register the balance of the buffer with gdb. */873tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),874region.start_aligned + region.total_size -875region.after_prologue);876}
877
878/*
879* Returns the size (in bytes) of all translated code (i.e. from all regions)
880* currently in the cache.
881* See also: tcg_code_capacity()
882* Do not confuse with tcg_current_code_size(); that one applies to a single
883* TCG context.
884*/
885size_t tcg_code_size(void)886{
887unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);888unsigned int i;889size_t total;890
891qemu_mutex_lock(®ion.lock);892total = region.agg_size_full;893for (i = 0; i < n_ctxs; i++) {894const TCGContext *s = qatomic_read(&tcg_ctxs[i]);895size_t size;896
897size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;898g_assert(size <= s->code_gen_buffer_size);899total += size;900}901qemu_mutex_unlock(®ion.lock);902return total;903}
904
905/*
906* Returns the code capacity (in bytes) of the entire cache, i.e. including all
907* regions.
908* See also: tcg_code_size()
909*/
910size_t tcg_code_capacity(void)911{
912size_t guard_size, capacity;913
914/* no need for synchronization; these variables are set at init time */915guard_size = region.stride - region.size;916capacity = region.total_size;917capacity -= (region.n - 1) * guard_size;918capacity -= region.n * TCG_HIGHWATER;919
920return capacity;921}
922