qemu

mmap.c
1485 строк · 45.3 Кб
Перенос по словам
1
/*
2
 *  mmap support for qemu
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; either version 2 of the License, or
9
 *  (at your option) any later version.
10
 *
11
 *  This program is distributed in the hope that it will be useful,
12
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 *  GNU General Public License for more details.
15
 *
16
 *  You should have received a copy of the GNU General Public License
17
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "qemu/osdep.h"
20
#include <sys/shm.h>
21
#include "trace.h"
22
#include "exec/log.h"
23
#include "exec/page-protection.h"
24
#include "qemu.h"
25
#include "user-internals.h"
26
#include "user-mmap.h"
27
#include "target_mman.h"
28
#include "qemu/interval-tree.h"
29

30
#ifdef TARGET_ARM
31
#include "target/arm/cpu-features.h"
32
#endif
33

34
static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
35
static __thread int mmap_lock_count;
36

37
void mmap_lock(void)
38
{
39
    if (mmap_lock_count++ == 0) {
40
        pthread_mutex_lock(&mmap_mutex);
41
    }
42
}
43

44
void mmap_unlock(void)
45
{
46
    assert(mmap_lock_count > 0);
47
    if (--mmap_lock_count == 0) {
48
        pthread_mutex_unlock(&mmap_mutex);
49
    }
50
}
51

52
bool have_mmap_lock(void)
53
{
54
    return mmap_lock_count > 0 ? true : false;
55
}
56

57
/* Grab lock to make sure things are in a consistent state after fork().  */
58
void mmap_fork_start(void)
59
{
60
    if (mmap_lock_count)
61
        abort();
62
    pthread_mutex_lock(&mmap_mutex);
63
}
64

65
void mmap_fork_end(int child)
66
{
67
    if (child) {
68
        pthread_mutex_init(&mmap_mutex, NULL);
69
    } else {
70
        pthread_mutex_unlock(&mmap_mutex);
71
    }
72
}
73

74
/* Protected by mmap_lock. */
75
static IntervalTreeRoot shm_regions;
76

77
static void shm_region_add(abi_ptr start, abi_ptr last)
78
{
79
    IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
80

81
    i->start = start;
82
    i->last = last;
83
    interval_tree_insert(i, &shm_regions);
84
}
85

86
static abi_ptr shm_region_find(abi_ptr start)
87
{
88
    IntervalTreeNode *i;
89

90
    for (i = interval_tree_iter_first(&shm_regions, start, start); i;
91
         i = interval_tree_iter_next(i, start, start)) {
92
        if (i->start == start) {
93
            return i->last;
94
        }
95
    }
96
    return 0;
97
}
98

99
static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
100
{
101
    IntervalTreeNode *i, *n;
102

103
    for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
104
        n = interval_tree_iter_next(i, start, last);
105
        if (i->start >= start && i->last <= last) {
106
            interval_tree_remove(i, &shm_regions);
107
            g_free(i);
108
        }
109
    }
110
}
111

112
/*
113
 * Validate target prot bitmask.
114
 * Return the prot bitmask for the host in *HOST_PROT.
115
 * Return 0 if the target prot bitmask is invalid, otherwise
116
 * the internal qemu page_flags (which will include PAGE_VALID).
117
 */
118
static int validate_prot_to_pageflags(int prot)
119
{
120
    int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
121
    int page_flags = (prot & PAGE_RWX) | PAGE_VALID;
122

123
#ifdef TARGET_AARCH64
124
    {
125
        ARMCPU *cpu = ARM_CPU(thread_cpu);
126

127
        /*
128
         * The PROT_BTI bit is only accepted if the cpu supports the feature.
129
         * Since this is the unusual case, don't bother checking unless
130
         * the bit has been requested.  If set and valid, record the bit
131
         * within QEMU's page_flags.
132
         */
133
        if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
134
            valid |= TARGET_PROT_BTI;
135
            page_flags |= PAGE_BTI;
136
        }
137
        /* Similarly for the PROT_MTE bit. */
138
        if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
139
            valid |= TARGET_PROT_MTE;
140
            page_flags |= PAGE_MTE;
141
        }
142
    }
143
#elif defined(TARGET_HPPA)
144
    valid |= PROT_GROWSDOWN | PROT_GROWSUP;
145
#endif
146

147
    return prot & ~valid ? 0 : page_flags;
148
}
149

150
/*
151
 * For the host, we need not pass anything except read/write/exec.
152
 * While PROT_SEM is allowed by all hosts, it is also ignored, so
153
 * don't bother transforming guest bit to host bit.  Any other
154
 * target-specific prot bits will not be understood by the host
155
 * and will need to be encoded into page_flags for qemu emulation.
156
 *
157
 * Pages that are executable by the guest will never be executed
158
 * by the host, but the host will need to be able to read them.
159
 */
160
static int target_to_host_prot(int prot)
161
{
162
    return (prot & (PROT_READ | PROT_WRITE)) |
163
           (prot & PROT_EXEC ? PROT_READ : 0);
164
}
165

166
/* NOTE: all the constants are the HOST ones, but addresses are target. */
167
int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
168
{
169
    int host_page_size = qemu_real_host_page_size();
170
    abi_ulong starts[3];
171
    abi_ulong lens[3];
172
    int prots[3];
173
    abi_ulong host_start, host_last, last;
174
    int prot1, ret, page_flags, nranges;
175

176
    trace_target_mprotect(start, len, target_prot);
177

178
    if ((start & ~TARGET_PAGE_MASK) != 0) {
179
        return -TARGET_EINVAL;
180
    }
181
    page_flags = validate_prot_to_pageflags(target_prot);
182
    if (!page_flags) {
183
        return -TARGET_EINVAL;
184
    }
185
    if (len == 0) {
186
        return 0;
187
    }
188
    len = TARGET_PAGE_ALIGN(len);
189
    if (!guest_range_valid_untagged(start, len)) {
190
        return -TARGET_ENOMEM;
191
    }
192

193
    last = start + len - 1;
194
    host_start = start & -host_page_size;
195
    host_last = ROUND_UP(last, host_page_size) - 1;
196
    nranges = 0;
197

198
    mmap_lock();
199

200
    if (host_last - host_start < host_page_size) {
201
        /* Single host page contains all guest pages: sum the prot. */
202
        prot1 = target_prot;
203
        for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
204
            prot1 |= page_get_flags(a);
205
        }
206
        for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
207
            prot1 |= page_get_flags(a + 1);
208
        }
209
        starts[nranges] = host_start;
210
        lens[nranges] = host_page_size;
211
        prots[nranges] = prot1;
212
        nranges++;
213
    } else {
214
        if (host_start < start) {
215
            /* Host page contains more than one guest page: sum the prot. */
216
            prot1 = target_prot;
217
            for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
218
                prot1 |= page_get_flags(a);
219
            }
220
            /* If the resulting sum differs, create a new range. */
221
            if (prot1 != target_prot) {
222
                starts[nranges] = host_start;
223
                lens[nranges] = host_page_size;
224
                prots[nranges] = prot1;
225
                nranges++;
226
                host_start += host_page_size;
227
            }
228
        }
229

230
        if (last < host_last) {
231
            /* Host page contains more than one guest page: sum the prot. */
232
            prot1 = target_prot;
233
            for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
234
                prot1 |= page_get_flags(a + 1);
235
            }
236
            /* If the resulting sum differs, create a new range. */
237
            if (prot1 != target_prot) {
238
                host_last -= host_page_size;
239
                starts[nranges] = host_last + 1;
240
                lens[nranges] = host_page_size;
241
                prots[nranges] = prot1;
242
                nranges++;
243
            }
244
        }
245

246
        /* Create a range for the middle, if any remains. */
247
        if (host_start < host_last) {
248
            starts[nranges] = host_start;
249
            lens[nranges] = host_last - host_start + 1;
250
            prots[nranges] = target_prot;
251
            nranges++;
252
        }
253
    }
254

255
    for (int i = 0; i < nranges; ++i) {
256
        ret = mprotect(g2h_untagged(starts[i]), lens[i],
257
                       target_to_host_prot(prots[i]));
258
        if (ret != 0) {
259
            goto error;
260
        }
261
    }
262

263
    page_set_flags(start, last, page_flags);
264
    ret = 0;
265

266
 error:
267
    mmap_unlock();
268
    return ret;
269
}
270

271
/*
272
 * Perform munmap on behalf of the target, with host parameters.
273
 * If reserved_va, we must replace the memory reservation.
274
 */
275
static int do_munmap(void *addr, size_t len)
276
{
277
    if (reserved_va) {
278
        void *ptr = mmap(addr, len, PROT_NONE,
279
                         MAP_FIXED | MAP_ANONYMOUS
280
                         | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
281
        return ptr == addr ? 0 : -1;
282
    }
283
    return munmap(addr, len);
284
}
285

286
/*
287
 * Perform a pread on behalf of target_mmap.  We can reach EOF, we can be
288
 * interrupted by signals, and in general there's no good error return path.
289
 * If @zero, zero the rest of the block at EOF.
290
 * Return true on success.
291
 */
292
static bool mmap_pread(int fd, void *p, size_t len, off_t offset, bool zero)
293
{
294
    while (1) {
295
        ssize_t r = pread(fd, p, len, offset);
296

297
        if (likely(r == len)) {
298
            /* Complete */
299
            return true;
300
        }
301
        if (r == 0) {
302
            /* EOF */
303
            if (zero) {
304
                memset(p, 0, len);
305
            }
306
            return true;
307
        }
308
        if (r > 0) {
309
            /* Short read */
310
            p += r;
311
            len -= r;
312
            offset += r;
313
        } else if (errno != EINTR) {
314
            /* Error */
315
            return false;
316
        }
317
    }
318
}
319

320
/*
321
 * Map an incomplete host page.
322
 *
323
 * Here be dragons.  This case will not work if there is an existing
324
 * overlapping host page, which is file mapped, and for which the mapping
325
 * is beyond the end of the file.  In that case, we will see SIGBUS when
326
 * trying to write a portion of this page.
327
 *
328
 * FIXME: Work around this with a temporary signal handler and longjmp.
329
 */
330
static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
331
                      int prot, int flags, int fd, off_t offset)
332
{
333
    int host_page_size = qemu_real_host_page_size();
334
    abi_ulong real_last;
335
    void *host_start;
336
    int prot_old, prot_new;
337
    int host_prot_old, host_prot_new;
338

339
    if (!(flags & MAP_ANONYMOUS)
340
        && (flags & MAP_TYPE) == MAP_SHARED
341
        && (prot & PROT_WRITE)) {
342
        /*
343
         * msync() won't work with the partial page, so we return an
344
         * error if write is possible while it is a shared mapping.
345
         */
346
        errno = EINVAL;
347
        return false;
348
    }
349

350
    real_last = real_start + host_page_size - 1;
351
    host_start = g2h_untagged(real_start);
352

353
    /* Get the protection of the target pages outside the mapping. */
354
    prot_old = 0;
355
    for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
356
        prot_old |= page_get_flags(a);
357
    }
358
    for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
359
        prot_old |= page_get_flags(a);
360
    }
361

362
    if (prot_old == 0) {
363
        /*
364
         * Since !(prot_old & PAGE_VALID), there were no guest pages
365
         * outside of the fragment we need to map.  Allocate a new host
366
         * page to cover, discarding whatever else may have been present.
367
         */
368
        void *p = mmap(host_start, host_page_size,
369
                       target_to_host_prot(prot),
370
                       flags | MAP_ANONYMOUS, -1, 0);
371
        if (p != host_start) {
372
            if (p != MAP_FAILED) {
373
                do_munmap(p, host_page_size);
374
                errno = EEXIST;
375
            }
376
            return false;
377
        }
378
        prot_old = prot;
379
    }
380
    prot_new = prot | prot_old;
381

382
    host_prot_old = target_to_host_prot(prot_old);
383
    host_prot_new = target_to_host_prot(prot_new);
384

385
    /* Adjust protection to be able to write. */
386
    if (!(host_prot_old & PROT_WRITE)) {
387
        host_prot_old |= PROT_WRITE;
388
        mprotect(host_start, host_page_size, host_prot_old);
389
    }
390

391
    /* Read or zero the new guest pages. */
392
    if (flags & MAP_ANONYMOUS) {
393
        memset(g2h_untagged(start), 0, last - start + 1);
394
    } else if (!mmap_pread(fd, g2h_untagged(start), last - start + 1,
395
                           offset, true)) {
396
        return false;
397
    }
398

399
    /* Put final protection */
400
    if (host_prot_new != host_prot_old) {
401
        mprotect(host_start, host_page_size, host_prot_new);
402
    }
403
    return true;
404
}
405

406
abi_ulong task_unmapped_base;
407
abi_ulong elf_et_dyn_base;
408
abi_ulong mmap_next_start;
409

410
/*
411
 * Subroutine of mmap_find_vma, used when we have pre-allocated
412
 * a chunk of guest address space.
413
 */
414
static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
415
                                        abi_ulong align)
416
{
417
    target_ulong ret;
418

419
    ret = page_find_range_empty(start, reserved_va, size, align);
420
    if (ret == -1 && start > mmap_min_addr) {
421
        /* Restart at the beginning of the address space. */
422
        ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
423
    }
424

425
    return ret;
426
}
427

428
/*
429
 * Find and reserve a free memory area of size 'size'. The search
430
 * starts at 'start'.
431
 * It must be called with mmap_lock() held.
432
 * Return -1 if error.
433
 */
434
abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
435
{
436
    int host_page_size = qemu_real_host_page_size();
437
    void *ptr, *prev;
438
    abi_ulong addr;
439
    int wrapped, repeat;
440

441
    align = MAX(align, host_page_size);
442

443
    /* If 'start' == 0, then a default start address is used. */
444
    if (start == 0) {
445
        start = mmap_next_start;
446
    } else {
447
        start &= -host_page_size;
448
    }
449
    start = ROUND_UP(start, align);
450
    size = ROUND_UP(size, host_page_size);
451

452
    if (reserved_va) {
453
        return mmap_find_vma_reserved(start, size, align);
454
    }
455

456
    addr = start;
457
    wrapped = repeat = 0;
458
    prev = 0;
459

460
    for (;; prev = ptr) {
461
        /*
462
         * Reserve needed memory area to avoid a race.
463
         * It should be discarded using:
464
         *  - mmap() with MAP_FIXED flag
465
         *  - mremap() with MREMAP_FIXED flag
466
         *  - shmat() with SHM_REMAP flag
467
         */
468
        ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
469
                   MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
470

471
        /* ENOMEM, if host address space has no memory */
472
        if (ptr == MAP_FAILED) {
473
            return (abi_ulong)-1;
474
        }
475

476
        /*
477
         * Count the number of sequential returns of the same address.
478
         * This is used to modify the search algorithm below.
479
         */
480
        repeat = (ptr == prev ? repeat + 1 : 0);
481

482
        if (h2g_valid(ptr + size - 1)) {
483
            addr = h2g(ptr);
484

485
            if ((addr & (align - 1)) == 0) {
486
                /* Success.  */
487
                if (start == mmap_next_start && addr >= task_unmapped_base) {
488
                    mmap_next_start = addr + size;
489
                }
490
                return addr;
491
            }
492

493
            /* The address is not properly aligned for the target.  */
494
            switch (repeat) {
495
            case 0:
496
                /*
497
                 * Assume the result that the kernel gave us is the
498
                 * first with enough free space, so start again at the
499
                 * next higher target page.
500
                 */
501
                addr = ROUND_UP(addr, align);
502
                break;
503
            case 1:
504
                /*
505
                 * Sometimes the kernel decides to perform the allocation
506
                 * at the top end of memory instead.
507
                 */
508
                addr &= -align;
509
                break;
510
            case 2:
511
                /* Start over at low memory.  */
512
                addr = 0;
513
                break;
514
            default:
515
                /* Fail.  This unaligned block must the last.  */
516
                addr = -1;
517
                break;
518
            }
519
        } else {
520
            /*
521
             * Since the result the kernel gave didn't fit, start
522
             * again at low memory.  If any repetition, fail.
523
             */
524
            addr = (repeat ? -1 : 0);
525
        }
526

527
        /* Unmap and try again.  */
528
        munmap(ptr, size);
529

530
        /* ENOMEM if we checked the whole of the target address space.  */
531
        if (addr == (abi_ulong)-1) {
532
            return (abi_ulong)-1;
533
        } else if (addr == 0) {
534
            if (wrapped) {
535
                return (abi_ulong)-1;
536
            }
537
            wrapped = 1;
538
            /*
539
             * Don't actually use 0 when wrapping, instead indicate
540
             * that we'd truly like an allocation in low memory.
541
             */
542
            addr = (mmap_min_addr > TARGET_PAGE_SIZE
543
                     ? TARGET_PAGE_ALIGN(mmap_min_addr)
544
                     : TARGET_PAGE_SIZE);
545
        } else if (wrapped && addr >= start) {
546
            return (abi_ulong)-1;
547
        }
548
    }
549
}
550

551
/*
552
 * Record a successful mmap within the user-exec interval tree.
553
 */
554
static abi_long mmap_end(abi_ulong start, abi_ulong last,
555
                         abi_ulong passthrough_start,
556
                         abi_ulong passthrough_last,
557
                         int flags, int page_flags)
558
{
559
    if (flags & MAP_ANONYMOUS) {
560
        page_flags |= PAGE_ANON;
561
    }
562
    page_flags |= PAGE_RESET;
563
    if (passthrough_start > passthrough_last) {
564
        page_set_flags(start, last, page_flags);
565
    } else {
566
        if (start < passthrough_start) {
567
            page_set_flags(start, passthrough_start - 1, page_flags);
568
        }
569
        page_set_flags(passthrough_start, passthrough_last,
570
                       page_flags | PAGE_PASSTHROUGH);
571
        if (passthrough_last < last) {
572
            page_set_flags(passthrough_last + 1, last, page_flags);
573
        }
574
    }
575
    shm_region_rm_complete(start, last);
576
    trace_target_mmap_complete(start);
577
    if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
578
        FILE *f = qemu_log_trylock();
579
        if (f) {
580
            fprintf(f, "page layout changed following mmap\n");
581
            page_dump(f);
582
            qemu_log_unlock(f);
583
        }
584
    }
585
    return start;
586
}
587

588
/*
589
 * Special case host page size == target page size,
590
 * where there are no edge conditions.
591
 */
592
static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len,
593
                            int host_prot, int flags, int page_flags,
594
                            int fd, off_t offset)
595
{
596
    void *p, *want_p = NULL;
597
    abi_ulong last;
598

599
    if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
600
        want_p = g2h_untagged(start);
601
    }
602

603
    p = mmap(want_p, len, host_prot, flags, fd, offset);
604
    if (p == MAP_FAILED) {
605
        return -1;
606
    }
607
    /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */
608
    if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) {
609
        do_munmap(p, len);
610
        errno = EEXIST;
611
        return -1;
612
    }
613

614
    start = h2g(p);
615
    last = start + len - 1;
616
    return mmap_end(start, last, start, last, flags, page_flags);
617
}
618

619
/*
620
 * Special case host page size < target page size.
621
 *
622
 * The two special cases are increased guest alignment, and mapping
623
 * past the end of a file.
624
 *
625
 * When mapping files into a memory area larger than the file,
626
 * accesses to pages beyond the file size will cause a SIGBUS.
627
 *
628
 * For example, if mmaping a file of 100 bytes on a host with 4K
629
 * pages emulating a target with 8K pages, the target expects to
630
 * be able to access the first 8K. But the host will trap us on
631
 * any access beyond 4K.
632
 *
633
 * When emulating a target with a larger page-size than the hosts,
634
 * we may need to truncate file maps at EOF and add extra anonymous
635
 * pages up to the targets page boundary.
636
 *
637
 * This workaround only works for files that do not change.
638
 * If the file is later extended (e.g. ftruncate), the SIGBUS
639
 * vanishes and the proper behaviour is that changes within the
640
 * anon page should be reflected in the file.
641
 *
642
 * However, this case is rather common with executable images,
643
 * so the workaround is important for even trivial tests, whereas
644
 * the mmap of of a file being extended is less common.
645
 */
646
static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot,
647
                            int mmap_flags, int page_flags, int fd,
648
                            off_t offset, int host_page_size)
649
{
650
    void *p, *want_p = NULL;
651
    off_t fileend_adj = 0;
652
    int flags = mmap_flags;
653
    abi_ulong last, pass_last;
654

655
    if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
656
        want_p = g2h_untagged(start);
657
    }
658

659
    if (!(flags & MAP_ANONYMOUS)) {
660
        struct stat sb;
661

662
        if (fstat(fd, &sb) == -1) {
663
            return -1;
664
        }
665
        if (offset >= sb.st_size) {
666
            /*
667
             * The entire map is beyond the end of the file.
668
             * Transform it to an anonymous mapping.
669
             */
670
            flags |= MAP_ANONYMOUS;
671
            fd = -1;
672
            offset = 0;
673
        } else if (offset + len > sb.st_size) {
674
            /*
675
             * A portion of the map is beyond the end of the file.
676
             * Truncate the file portion of the allocation.
677
             */
678
            fileend_adj = offset + len - sb.st_size;
679
        }
680
    }
681

682
    if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
683
        if (fileend_adj) {
684
            p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
685
        } else {
686
            p = mmap(want_p, len, host_prot, flags, fd, offset);
687
        }
688
        if (p != want_p) {
689
            if (p != MAP_FAILED) {
690
                /* Host does not support MAP_FIXED_NOREPLACE: emulate. */
691
                do_munmap(p, len);
692
                errno = EEXIST;
693
            }
694
            return -1;
695
        }
696

697
        if (fileend_adj) {
698
            void *t = mmap(p, len - fileend_adj, host_prot,
699
                           (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
700
                           fd, offset);
701

702
            if (t == MAP_FAILED) {
703
                int save_errno = errno;
704

705
                /*
706
                 * We failed a map over the top of the successful anonymous
707
                 * mapping above. The only failure mode is running out of VMAs,
708
                 * and there's nothing that we can do to detect that earlier.
709
                 * If we have replaced an existing mapping with MAP_FIXED,
710
                 * then we cannot properly recover.  It's a coin toss whether
711
                 * it would be better to exit or continue here.
712
                 */
713
                if (!(flags & MAP_FIXED_NOREPLACE) &&
714
                    !page_check_range_empty(start, start + len - 1)) {
715
                    qemu_log("QEMU target_mmap late failure: %s",
716
                             strerror(save_errno));
717
                }
718

719
                do_munmap(want_p, len);
720
                errno = save_errno;
721
                return -1;
722
            }
723
        }
724
    } else {
725
        size_t host_len, part_len;
726

727
        /*
728
         * Take care to align the host memory.  Perform a larger anonymous
729
         * allocation and extract the aligned portion.  Remap the file on
730
         * top of that.
731
         */
732
        host_len = len + TARGET_PAGE_SIZE - host_page_size;
733
        p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
734
        if (p == MAP_FAILED) {
735
            return -1;
736
        }
737

738
        part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1);
739
        if (part_len) {
740
            part_len = TARGET_PAGE_SIZE - part_len;
741
            do_munmap(p, part_len);
742
            p += part_len;
743
            host_len -= part_len;
744
        }
745
        if (len < host_len) {
746
            do_munmap(p + len, host_len - len);
747
        }
748

749
        if (!(flags & MAP_ANONYMOUS)) {
750
            void *t = mmap(p, len - fileend_adj, host_prot,
751
                           flags | MAP_FIXED, fd, offset);
752

753
            if (t == MAP_FAILED) {
754
                int save_errno = errno;
755
                do_munmap(p, len);
756
                errno = save_errno;
757
                return -1;
758
            }
759
        }
760

761
        start = h2g(p);
762
    }
763

764
    last = start + len - 1;
765
    if (fileend_adj) {
766
        pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1;
767
    } else {
768
        pass_last = last;
769
    }
770
    return mmap_end(start, last, start, pass_last, mmap_flags, page_flags);
771
}
772

773
/*
774
 * Special case host page size > target page size.
775
 *
776
 * The two special cases are address and file offsets that are valid
777
 * for the guest that cannot be directly represented by the host.
778
 */
779
static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len,
780
                            int target_prot, int host_prot,
781
                            int flags, int page_flags, int fd,
782
                            off_t offset, int host_page_size)
783
{
784
    void *p, *want_p = NULL;
785
    off_t host_offset = offset & -host_page_size;
786
    abi_ulong last, real_start, real_last;
787
    bool misaligned_offset = false;
788
    size_t host_len;
789

790
    if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
791
        want_p = g2h_untagged(start);
792
    }
793

794
    if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
795
        /*
796
         * Adjust the offset to something representable on the host.
797
         */
798
        host_len = len + offset - host_offset;
799
        p = mmap(want_p, host_len, host_prot, flags, fd, host_offset);
800
        if (p == MAP_FAILED) {
801
            return -1;
802
        }
803

804
        /* Update start to the file position at offset. */
805
        p += offset - host_offset;
806

807
        start = h2g(p);
808
        last = start + len - 1;
809
        return mmap_end(start, last, start, last, flags, page_flags);
810
    }
811

812
    if (!(flags & MAP_ANONYMOUS)) {
813
        misaligned_offset = (start ^ offset) & (host_page_size - 1);
814

815
        /*
816
         * The fallback for misalignment is a private mapping + read.
817
         * This carries none of semantics required of MAP_SHARED.
818
         */
819
        if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) {
820
            errno = EINVAL;
821
            return -1;
822
        }
823
    }
824

825
    last = start + len - 1;
826
    real_start = start & -host_page_size;
827
    real_last = ROUND_UP(last, host_page_size) - 1;
828

829
    /*
830
     * Handle the start and end of the mapping.
831
     */
832
    if (real_start < start) {
833
        abi_ulong real_page_last = real_start + host_page_size - 1;
834
        if (last <= real_page_last) {
835
            /* Entire allocation a subset of one host page. */
836
            if (!mmap_frag(real_start, start, last, target_prot,
837
                           flags, fd, offset)) {
838
                return -1;
839
            }
840
            return mmap_end(start, last, -1, 0, flags, page_flags);
841
        }
842

843
        if (!mmap_frag(real_start, start, real_page_last, target_prot,
844
                       flags, fd, offset)) {
845
            return -1;
846
        }
847
        real_start = real_page_last + 1;
848
    }
849

850
    if (last < real_last) {
851
        abi_ulong real_page_start = real_last - host_page_size + 1;
852
        if (!mmap_frag(real_page_start, real_page_start, last,
853
                       target_prot, flags, fd,
854
                       offset + real_page_start - start)) {
855
            return -1;
856
        }
857
        real_last = real_page_start - 1;
858
    }
859

860
    if (real_start > real_last) {
861
        return mmap_end(start, last, -1, 0, flags, page_flags);
862
    }
863

864
    /*
865
     * Handle the middle of the mapping.
866
     */
867

868
    host_len = real_last - real_start + 1;
869
    want_p += real_start - start;
870

871
    if (flags & MAP_ANONYMOUS) {
872
        p = mmap(want_p, host_len, host_prot, flags, -1, 0);
873
    } else if (!misaligned_offset) {
874
        p = mmap(want_p, host_len, host_prot, flags, fd,
875
                 offset + real_start - start);
876
    } else {
877
        p = mmap(want_p, host_len, host_prot | PROT_WRITE,
878
                 flags | MAP_ANONYMOUS, -1, 0);
879
    }
880
    if (p != want_p) {
881
        if (p != MAP_FAILED) {
882
            do_munmap(p, host_len);
883
            errno = EEXIST;
884
        }
885
        return -1;
886
    }
887

888
    if (misaligned_offset) {
889
        if (!mmap_pread(fd, p, host_len, offset + real_start - start, false)) {
890
            do_munmap(p, host_len);
891
            return -1;
892
        }
893
        if (!(host_prot & PROT_WRITE)) {
894
            mprotect(p, host_len, host_prot);
895
        }
896
    }
897

898
    return mmap_end(start, last, -1, 0, flags, page_flags);
899
}
900

901
static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
902
                                    int target_prot, int flags, int page_flags,
903
                                    int fd, off_t offset)
904
{
905
    int host_page_size = qemu_real_host_page_size();
906
    int host_prot;
907

908
    /*
909
     * For reserved_va, we are in full control of the allocation.
910
     * Find a suitable hole and convert to MAP_FIXED.
911
     */
912
    if (reserved_va) {
913
        if (flags & MAP_FIXED_NOREPLACE) {
914
            /* Validate that the chosen range is empty. */
915
            if (!page_check_range_empty(start, start + len - 1)) {
916
                errno = EEXIST;
917
                return -1;
918
            }
919
            flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
920
        } else if (!(flags & MAP_FIXED)) {
921
            abi_ulong real_start = start & -host_page_size;
922
            off_t host_offset = offset & -host_page_size;
923
            size_t real_len = len + offset - host_offset;
924
            abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE);
925

926
            start = mmap_find_vma(real_start, real_len, align);
927
            if (start == (abi_ulong)-1) {
928
                errno = ENOMEM;
929
                return -1;
930
            }
931
            start += offset - host_offset;
932
            flags |= MAP_FIXED;
933
        }
934
    }
935

936
    host_prot = target_to_host_prot(target_prot);
937

938
    if (host_page_size == TARGET_PAGE_SIZE) {
939
        return mmap_h_eq_g(start, len, host_prot, flags,
940
                           page_flags, fd, offset);
941
    } else if (host_page_size < TARGET_PAGE_SIZE) {
942
        return mmap_h_lt_g(start, len, host_prot, flags,
943
                           page_flags, fd, offset, host_page_size);
944
    } else {
945
        return mmap_h_gt_g(start, len, target_prot, host_prot, flags,
946
                           page_flags, fd, offset, host_page_size);
947
    }
948
}
949

950
/* NOTE: all the constants are the HOST ones */
951
abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
952
                     int flags, int fd, off_t offset)
953
{
954
    abi_long ret;
955
    int page_flags;
956

957
    trace_target_mmap(start, len, target_prot, flags, fd, offset);
958

959
    if (!len) {
960
        errno = EINVAL;
961
        return -1;
962
    }
963

964
    page_flags = validate_prot_to_pageflags(target_prot);
965
    if (!page_flags) {
966
        errno = EINVAL;
967
        return -1;
968
    }
969

970
    /* Also check for overflows... */
971
    len = TARGET_PAGE_ALIGN(len);
972
    if (!len || len != (size_t)len) {
973
        errno = ENOMEM;
974
        return -1;
975
    }
976

977
    if (offset & ~TARGET_PAGE_MASK) {
978
        errno = EINVAL;
979
        return -1;
980
    }
981
    if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
982
        if (start & ~TARGET_PAGE_MASK) {
983
            errno = EINVAL;
984
            return -1;
985
        }
986
        if (!guest_range_valid_untagged(start, len)) {
987
            errno = ENOMEM;
988
            return -1;
989
        }
990
    }
991

992
    mmap_lock();
993

994
    ret = target_mmap__locked(start, len, target_prot, flags,
995
                              page_flags, fd, offset);
996

997
    mmap_unlock();
998

999
    /*
1000
     * If we're mapping shared memory, ensure we generate code for parallel
1001
     * execution and flush old translations.  This will work up to the level
1002
     * supported by the host -- anything that requires EXCP_ATOMIC will not
1003
     * be atomic with respect to an external process.
1004
     */
1005
    if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) {
1006
        CPUState *cpu = thread_cpu;
1007
        if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
1008
            tcg_cflags_set(cpu, CF_PARALLEL);
1009
            tb_flush(cpu);
1010
        }
1011
    }
1012

1013
    return ret;
1014
}
1015

1016
static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
1017
{
1018
    int host_page_size = qemu_real_host_page_size();
1019
    abi_ulong real_start;
1020
    abi_ulong real_last;
1021
    abi_ulong real_len;
1022
    abi_ulong last;
1023
    abi_ulong a;
1024
    void *host_start;
1025
    int prot;
1026

1027
    last = start + len - 1;
1028
    real_start = start & -host_page_size;
1029
    real_last = ROUND_UP(last, host_page_size) - 1;
1030

1031
    /*
1032
     * If guest pages remain on the first or last host pages,
1033
     * adjust the deallocation to retain those guest pages.
1034
     * The single page special case is required for the last page,
1035
     * lest real_start overflow to zero.
1036
     */
1037
    if (real_last - real_start < host_page_size) {
1038
        prot = 0;
1039
        for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
1040
            prot |= page_get_flags(a);
1041
        }
1042
        for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
1043
            prot |= page_get_flags(a + 1);
1044
        }
1045
        if (prot != 0) {
1046
            return 0;
1047
        }
1048
    } else {
1049
        for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
1050
            prot |= page_get_flags(a);
1051
        }
1052
        if (prot != 0) {
1053
            real_start += host_page_size;
1054
        }
1055

1056
        for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
1057
            prot |= page_get_flags(a + 1);
1058
        }
1059
        if (prot != 0) {
1060
            real_last -= host_page_size;
1061
        }
1062

1063
        if (real_last < real_start) {
1064
            return 0;
1065
        }
1066
    }
1067

1068
    real_len = real_last - real_start + 1;
1069
    host_start = g2h_untagged(real_start);
1070

1071
    return do_munmap(host_start, real_len);
1072
}
1073

1074
int target_munmap(abi_ulong start, abi_ulong len)
1075
{
1076
    int ret;
1077

1078
    trace_target_munmap(start, len);
1079

1080
    if (start & ~TARGET_PAGE_MASK) {
1081
        errno = EINVAL;
1082
        return -1;
1083
    }
1084
    len = TARGET_PAGE_ALIGN(len);
1085
    if (len == 0 || !guest_range_valid_untagged(start, len)) {
1086
        errno = EINVAL;
1087
        return -1;
1088
    }
1089

1090
    mmap_lock();
1091
    ret = mmap_reserve_or_unmap(start, len);
1092
    if (likely(ret == 0)) {
1093
        page_set_flags(start, start + len - 1, 0);
1094
        shm_region_rm_complete(start, start + len - 1);
1095
    }
1096
    mmap_unlock();
1097

1098
    return ret;
1099
}
1100

1101
abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
1102
                       abi_ulong new_size, unsigned long flags,
1103
                       abi_ulong new_addr)
1104
{
1105
    int prot;
1106
    void *host_addr;
1107

1108
    if (!guest_range_valid_untagged(old_addr, old_size) ||
1109
        ((flags & MREMAP_FIXED) &&
1110
         !guest_range_valid_untagged(new_addr, new_size)) ||
1111
        ((flags & MREMAP_MAYMOVE) == 0 &&
1112
         !guest_range_valid_untagged(old_addr, new_size))) {
1113
        errno = ENOMEM;
1114
        return -1;
1115
    }
1116

1117
    mmap_lock();
1118

1119
    if (flags & MREMAP_FIXED) {
1120
        host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
1121
                           flags, g2h_untagged(new_addr));
1122

1123
        if (reserved_va && host_addr != MAP_FAILED) {
1124
            /*
1125
             * If new and old addresses overlap then the above mremap will
1126
             * already have failed with EINVAL.
1127
             */
1128
            mmap_reserve_or_unmap(old_addr, old_size);
1129
        }
1130
    } else if (flags & MREMAP_MAYMOVE) {
1131
        abi_ulong mmap_start;
1132

1133
        mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
1134

1135
        if (mmap_start == -1) {
1136
            errno = ENOMEM;
1137
            host_addr = MAP_FAILED;
1138
        } else {
1139
            host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
1140
                               flags | MREMAP_FIXED,
1141
                               g2h_untagged(mmap_start));
1142
            if (reserved_va) {
1143
                mmap_reserve_or_unmap(old_addr, old_size);
1144
            }
1145
        }
1146
    } else {
1147
        int page_flags = 0;
1148
        if (reserved_va && old_size < new_size) {
1149
            abi_ulong addr;
1150
            for (addr = old_addr + old_size;
1151
                 addr < old_addr + new_size;
1152
                 addr++) {
1153
                page_flags |= page_get_flags(addr);
1154
            }
1155
        }
1156
        if (page_flags == 0) {
1157
            host_addr = mremap(g2h_untagged(old_addr),
1158
                               old_size, new_size, flags);
1159

1160
            if (host_addr != MAP_FAILED) {
1161
                /* Check if address fits target address space */
1162
                if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
1163
                    /* Revert mremap() changes */
1164
                    host_addr = mremap(g2h_untagged(old_addr),
1165
                                       new_size, old_size, flags);
1166
                    errno = ENOMEM;
1167
                    host_addr = MAP_FAILED;
1168
                } else if (reserved_va && old_size > new_size) {
1169
                    mmap_reserve_or_unmap(old_addr + old_size,
1170
                                          old_size - new_size);
1171
                }
1172
            }
1173
        } else {
1174
            errno = ENOMEM;
1175
            host_addr = MAP_FAILED;
1176
        }
1177
    }
1178

1179
    if (host_addr == MAP_FAILED) {
1180
        new_addr = -1;
1181
    } else {
1182
        new_addr = h2g(host_addr);
1183
        prot = page_get_flags(old_addr);
1184
        page_set_flags(old_addr, old_addr + old_size - 1, 0);
1185
        shm_region_rm_complete(old_addr, old_addr + old_size - 1);
1186
        page_set_flags(new_addr, new_addr + new_size - 1,
1187
                       prot | PAGE_VALID | PAGE_RESET);
1188
        shm_region_rm_complete(new_addr, new_addr + new_size - 1);
1189
    }
1190
    mmap_unlock();
1191
    return new_addr;
1192
}
1193

1194
abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
1195
{
1196
    abi_ulong len;
1197
    int ret = 0;
1198

1199
    if (start & ~TARGET_PAGE_MASK) {
1200
        return -TARGET_EINVAL;
1201
    }
1202
    if (len_in == 0) {
1203
        return 0;
1204
    }
1205
    len = TARGET_PAGE_ALIGN(len_in);
1206
    if (len == 0 || !guest_range_valid_untagged(start, len)) {
1207
        return -TARGET_EINVAL;
1208
    }
1209

1210
    /* Translate for some architectures which have different MADV_xxx values */
1211
    switch (advice) {
1212
    case TARGET_MADV_DONTNEED:      /* alpha */
1213
        advice = MADV_DONTNEED;
1214
        break;
1215
    case TARGET_MADV_WIPEONFORK:    /* parisc */
1216
        advice = MADV_WIPEONFORK;
1217
        break;
1218
    case TARGET_MADV_KEEPONFORK:    /* parisc */
1219
        advice = MADV_KEEPONFORK;
1220
        break;
1221
    /* we do not care about the other MADV_xxx values yet */
1222
    }
1223

1224
    /*
1225
     * Most advice values are hints, so ignoring and returning success is ok.
1226
     *
1227
     * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
1228
     * MADV_KEEPONFORK are not hints and need to be emulated.
1229
     *
1230
     * A straight passthrough for those may not be safe because qemu sometimes
1231
     * turns private file-backed mappings into anonymous mappings.
1232
     * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
1233
     * same semantics for the host as for the guest.
1234
     *
1235
     * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
1236
     * return failure if not.
1237
     *
1238
     * MADV_DONTNEED is passed through as well, if possible.
1239
     * If passthrough isn't possible, we nevertheless (wrongly!) return
1240
     * success, which is broken but some userspace programs fail to work
1241
     * otherwise. Completely implementing such emulation is quite complicated
1242
     * though.
1243
     */
1244
    mmap_lock();
1245
    switch (advice) {
1246
    case MADV_WIPEONFORK:
1247
    case MADV_KEEPONFORK:
1248
        ret = -EINVAL;
1249
        /* fall through */
1250
    case MADV_DONTNEED:
1251
        if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
1252
            ret = get_errno(madvise(g2h_untagged(start), len, advice));
1253
            if ((advice == MADV_DONTNEED) && (ret == 0)) {
1254
                page_reset_target_data(start, start + len - 1);
1255
            }
1256
        }
1257
    }
1258
    mmap_unlock();
1259

1260
    return ret;
1261
}
1262

1263
#ifndef TARGET_FORCE_SHMLBA
1264
/*
1265
 * For most architectures, SHMLBA is the same as the page size;
1266
 * some architectures have larger values, in which case they should
1267
 * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
1268
 * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
1269
 * and defining its own value for SHMLBA.
1270
 *
1271
 * The kernel also permits SHMLBA to be set by the architecture to a
1272
 * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
1273
 * this means that addresses are rounded to the large size if
1274
 * SHM_RND is set but addresses not aligned to that size are not rejected
1275
 * as long as they are at least page-aligned. Since the only architecture
1276
 * which uses this is ia64 this code doesn't provide for that oddity.
1277
 */
1278
static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
1279
{
1280
    return TARGET_PAGE_SIZE;
1281
}
1282
#endif
1283

1284
#if defined(__arm__) || defined(__mips__) || defined(__sparc__)
1285
#define HOST_FORCE_SHMLBA 1
1286
#else
1287
#define HOST_FORCE_SHMLBA 0
1288
#endif
1289

1290
abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
1291
                       abi_ulong shmaddr, int shmflg)
1292
{
1293
    CPUState *cpu = env_cpu(cpu_env);
1294
    struct shmid_ds shm_info;
1295
    int ret;
1296
    int h_pagesize;
1297
    int t_shmlba, h_shmlba, m_shmlba;
1298
    size_t t_len, h_len, m_len;
1299

1300
    /* shmat pointers are always untagged */
1301

1302
    /*
1303
     * Because we can't use host shmat() unless the address is sufficiently
1304
     * aligned for the host, we'll need to check both.
1305
     * TODO: Could be fixed with softmmu.
1306
     */
1307
    t_shmlba = target_shmlba(cpu_env);
1308
    h_pagesize = qemu_real_host_page_size();
1309
    h_shmlba = (HOST_FORCE_SHMLBA ? SHMLBA : h_pagesize);
1310
    m_shmlba = MAX(t_shmlba, h_shmlba);
1311

1312
    if (shmaddr) {
1313
        if (shmaddr & (m_shmlba - 1)) {
1314
            if (shmflg & SHM_RND) {
1315
                /*
1316
                 * The guest is allowing the kernel to round the address.
1317
                 * Assume that the guest is ok with us rounding to the
1318
                 * host required alignment too.  Anyway if we don't, we'll
1319
                 * get an error from the kernel.
1320
                 */
1321
                shmaddr &= ~(m_shmlba - 1);
1322
                if (shmaddr == 0 && (shmflg & SHM_REMAP)) {
1323
                    return -TARGET_EINVAL;
1324
                }
1325
            } else {
1326
                int require = TARGET_PAGE_SIZE;
1327
#ifdef TARGET_FORCE_SHMLBA
1328
                require = t_shmlba;
1329
#endif
1330
                /*
1331
                 * Include host required alignment, as otherwise we cannot
1332
                 * use host shmat at all.
1333
                 */
1334
                require = MAX(require, h_shmlba);
1335
                if (shmaddr & (require - 1)) {
1336
                    return -TARGET_EINVAL;
1337
                }
1338
            }
1339
        }
1340
    } else {
1341
        if (shmflg & SHM_REMAP) {
1342
            return -TARGET_EINVAL;
1343
        }
1344
    }
1345
    /* All rounding now manually concluded. */
1346
    shmflg &= ~SHM_RND;
1347

1348
    /* Find out the length of the shared memory segment. */
1349
    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
1350
    if (is_error(ret)) {
1351
        /* can't get length, bail out */
1352
        return ret;
1353
    }
1354
    t_len = TARGET_PAGE_ALIGN(shm_info.shm_segsz);
1355
    h_len = ROUND_UP(shm_info.shm_segsz, h_pagesize);
1356
    m_len = MAX(t_len, h_len);
1357

1358
    if (!guest_range_valid_untagged(shmaddr, m_len)) {
1359
        return -TARGET_EINVAL;
1360
    }
1361

1362
    WITH_MMAP_LOCK_GUARD() {
1363
        bool mapped = false;
1364
        void *want, *test;
1365
        abi_ulong last;
1366

1367
        if (!shmaddr) {
1368
            shmaddr = mmap_find_vma(0, m_len, m_shmlba);
1369
            if (shmaddr == -1) {
1370
                return -TARGET_ENOMEM;
1371
            }
1372
            mapped = !reserved_va;
1373
        } else if (shmflg & SHM_REMAP) {
1374
            /*
1375
             * If host page size > target page size, the host shmat may map
1376
             * more memory than the guest expects.  Reject a mapping that
1377
             * would replace memory in the unexpected gap.
1378
             * TODO: Could be fixed with softmmu.
1379
             */
1380
            if (t_len < h_len &&
1381
                !page_check_range_empty(shmaddr + t_len,
1382
                                        shmaddr + h_len - 1)) {
1383
                return -TARGET_EINVAL;
1384
            }
1385
        } else {
1386
            if (!page_check_range_empty(shmaddr, shmaddr + m_len - 1)) {
1387
                return -TARGET_EINVAL;
1388
            }
1389
        }
1390

1391
        /* All placement is now complete. */
1392
        want = (void *)g2h_untagged(shmaddr);
1393

1394
        /*
1395
         * Map anonymous pages across the entire range, then remap with
1396
         * the shared memory.  This is required for a number of corner
1397
         * cases for which host and guest page sizes differ.
1398
         */
1399
        if (h_len != t_len) {
1400
            int mmap_p = PROT_READ | (shmflg & SHM_RDONLY ? 0 : PROT_WRITE);
1401
            int mmap_f = MAP_PRIVATE | MAP_ANONYMOUS
1402
                       | (reserved_va || mapped || (shmflg & SHM_REMAP)
1403
                          ? MAP_FIXED : MAP_FIXED_NOREPLACE);
1404

1405
            test = mmap(want, m_len, mmap_p, mmap_f, -1, 0);
1406
            if (unlikely(test != want)) {
1407
                /* shmat returns EINVAL not EEXIST like mmap. */
1408
                ret = (test == MAP_FAILED && errno != EEXIST
1409
                       ? get_errno(-1) : -TARGET_EINVAL);
1410
                if (mapped) {
1411
                    do_munmap(want, m_len);
1412
                }
1413
                return ret;
1414
            }
1415
            mapped = true;
1416
        }
1417

1418
        if (reserved_va || mapped) {
1419
            shmflg |= SHM_REMAP;
1420
        }
1421
        test = shmat(shmid, want, shmflg);
1422
        if (test == MAP_FAILED) {
1423
            ret = get_errno(-1);
1424
            if (mapped) {
1425
                do_munmap(want, m_len);
1426
            }
1427
            return ret;
1428
        }
1429
        assert(test == want);
1430

1431
        last = shmaddr + m_len - 1;
1432
        page_set_flags(shmaddr, last,
1433
                       PAGE_VALID | PAGE_RESET | PAGE_READ |
1434
                       (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE) |
1435
                       (shmflg & SHM_EXEC ? PAGE_EXEC : 0));
1436

1437
        shm_region_rm_complete(shmaddr, last);
1438
        shm_region_add(shmaddr, last);
1439
    }
1440

1441
    /*
1442
     * We're mapping shared memory, so ensure we generate code for parallel
1443
     * execution and flush old translations.  This will work up to the level
1444
     * supported by the host -- anything that requires EXCP_ATOMIC will not
1445
     * be atomic with respect to an external process.
1446
     */
1447
    if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
1448
        tcg_cflags_set(cpu, CF_PARALLEL);
1449
        tb_flush(cpu);
1450
    }
1451

1452
    if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
1453
        FILE *f = qemu_log_trylock();
1454
        if (f) {
1455
            fprintf(f, "page layout changed following shmat\n");
1456
            page_dump(f);
1457
            qemu_log_unlock(f);
1458
        }
1459
    }
1460
    return shmaddr;
1461
}
1462

1463
abi_long target_shmdt(abi_ulong shmaddr)
1464
{
1465
    abi_long rv;
1466

1467
    /* shmdt pointers are always untagged */
1468

1469
    WITH_MMAP_LOCK_GUARD() {
1470
        abi_ulong last = shm_region_find(shmaddr);
1471
        if (last == 0) {
1472
            return -TARGET_EINVAL;
1473
        }
1474

1475
        rv = get_errno(shmdt(g2h_untagged(shmaddr)));
1476
        if (rv == 0) {
1477
            abi_ulong size = last - shmaddr + 1;
1478

1479
            page_set_flags(shmaddr, last, 0);
1480
            shm_region_rm_complete(shmaddr, last);
1481
            mmap_reserve_or_unmap(shmaddr, size);
1482
        }
1483
    }
1484
    return rv;
1485
}
1486
qemu

Использование cookies