xref: /qemu/linux-user/mmap.c (revision fc524567087c2537b5103cdfc1d41e4f442892b6)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include <sys/shm.h>
21 #include "trace.h"
22 #include "exec/log.h"
23 #include "exec/page-protection.h"
24 #include "exec/mmap-lock.h"
25 #include "exec/tb-flush.h"
26 #include "exec/translation-block.h"
27 #include "qemu.h"
28 #include "user/page-protection.h"
29 #include "user-internals.h"
30 #include "user-mmap.h"
31 #include "target_mman.h"
32 #include "qemu/interval-tree.h"
33 
34 #ifdef TARGET_ARM
35 #include "target/arm/cpu-features.h"
36 #endif
37 
38 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
39 static __thread int mmap_lock_count;
40 
mmap_lock(void)41 void mmap_lock(void)
42 {
43     if (mmap_lock_count++ == 0) {
44         pthread_mutex_lock(&mmap_mutex);
45     }
46 }
47 
mmap_unlock(void)48 void mmap_unlock(void)
49 {
50     assert(mmap_lock_count > 0);
51     if (--mmap_lock_count == 0) {
52         pthread_mutex_unlock(&mmap_mutex);
53     }
54 }
55 
have_mmap_lock(void)56 bool have_mmap_lock(void)
57 {
58     return mmap_lock_count > 0 ? true : false;
59 }
60 
61 /* Grab lock to make sure things are in a consistent state after fork().  */
mmap_fork_start(void)62 void mmap_fork_start(void)
63 {
64     if (mmap_lock_count)
65         abort();
66     pthread_mutex_lock(&mmap_mutex);
67 }
68 
mmap_fork_end(int child)69 void mmap_fork_end(int child)
70 {
71     if (child) {
72         pthread_mutex_init(&mmap_mutex, NULL);
73     } else {
74         pthread_mutex_unlock(&mmap_mutex);
75     }
76 }
77 
78 /* Protected by mmap_lock. */
79 static IntervalTreeRoot shm_regions;
80 
shm_region_add(abi_ptr start,abi_ptr last)81 static void shm_region_add(abi_ptr start, abi_ptr last)
82 {
83     IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
84 
85     i->start = start;
86     i->last = last;
87     interval_tree_insert(i, &shm_regions);
88 }
89 
shm_region_find(abi_ptr start)90 static abi_ptr shm_region_find(abi_ptr start)
91 {
92     IntervalTreeNode *i;
93 
94     for (i = interval_tree_iter_first(&shm_regions, start, start); i;
95          i = interval_tree_iter_next(i, start, start)) {
96         if (i->start == start) {
97             return i->last;
98         }
99     }
100     return 0;
101 }
102 
shm_region_rm_complete(abi_ptr start,abi_ptr last)103 static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
104 {
105     IntervalTreeNode *i, *n;
106 
107     for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
108         n = interval_tree_iter_next(i, start, last);
109         if (i->start >= start && i->last <= last) {
110             interval_tree_remove(i, &shm_regions);
111             g_free(i);
112         }
113     }
114 }
115 
116 /*
117  * Validate target prot bitmask.
118  * Return the prot bitmask for the host in *HOST_PROT.
119  * Return 0 if the target prot bitmask is invalid, otherwise
120  * the internal qemu page_flags (which will include PAGE_VALID).
121  */
validate_prot_to_pageflags(int prot)122 static int validate_prot_to_pageflags(int prot)
123 {
124     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
125     int page_flags = (prot & PAGE_RWX) | PAGE_VALID;
126 
127 #ifdef TARGET_AARCH64
128     {
129         ARMCPU *cpu = ARM_CPU(thread_cpu);
130 
131         /*
132          * The PROT_BTI bit is only accepted if the cpu supports the feature.
133          * Since this is the unusual case, don't bother checking unless
134          * the bit has been requested.  If set and valid, record the bit
135          * within QEMU's page_flags.
136          */
137         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
138             valid |= TARGET_PROT_BTI;
139             page_flags |= PAGE_BTI;
140         }
141         /* Similarly for the PROT_MTE bit. */
142         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
143             valid |= TARGET_PROT_MTE;
144             page_flags |= PAGE_MTE;
145         }
146     }
147 #elif defined(TARGET_HPPA)
148     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
149 #endif
150 
151     return prot & ~valid ? 0 : page_flags;
152 }
153 
154 /*
155  * For the host, we need not pass anything except read/write/exec.
156  * While PROT_SEM is allowed by all hosts, it is also ignored, so
157  * don't bother transforming guest bit to host bit.  Any other
158  * target-specific prot bits will not be understood by the host
159  * and will need to be encoded into page_flags for qemu emulation.
160  *
161  * Pages that are executable by the guest will never be executed
162  * by the host, but the host will need to be able to read them.
163  */
target_to_host_prot(int prot)164 static int target_to_host_prot(int prot)
165 {
166     return (prot & (PROT_READ | PROT_WRITE)) |
167            (prot & PROT_EXEC ? PROT_READ : 0);
168 }
169 
170 /* NOTE: all the constants are the HOST ones, but addresses are target. */
target_mprotect(abi_ulong start,abi_ulong len,int target_prot)171 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
172 {
173     int host_page_size = qemu_real_host_page_size();
174     abi_ulong starts[3];
175     abi_ulong lens[3];
176     int prots[3];
177     abi_ulong host_start, host_last, last;
178     int prot1, ret, page_flags, nranges;
179 
180     trace_target_mprotect(start, len, target_prot);
181 
182     if ((start & ~TARGET_PAGE_MASK) != 0) {
183         return -TARGET_EINVAL;
184     }
185     page_flags = validate_prot_to_pageflags(target_prot);
186     if (!page_flags) {
187         return -TARGET_EINVAL;
188     }
189     if (len == 0) {
190         return 0;
191     }
192     len = TARGET_PAGE_ALIGN(len);
193     if (!guest_range_valid_untagged(start, len)) {
194         return -TARGET_ENOMEM;
195     }
196 
197     last = start + len - 1;
198     host_start = start & -host_page_size;
199     host_last = ROUND_UP(last, host_page_size) - 1;
200     nranges = 0;
201 
202     mmap_lock();
203 
204     if (host_last - host_start < host_page_size) {
205         /* Single host page contains all guest pages: sum the prot. */
206         prot1 = target_prot;
207         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
208             prot1 |= page_get_flags(a);
209         }
210         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
211             prot1 |= page_get_flags(a + 1);
212         }
213         starts[nranges] = host_start;
214         lens[nranges] = host_page_size;
215         prots[nranges] = prot1;
216         nranges++;
217     } else {
218         if (host_start < start) {
219             /* Host page contains more than one guest page: sum the prot. */
220             prot1 = target_prot;
221             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
222                 prot1 |= page_get_flags(a);
223             }
224             /* If the resulting sum differs, create a new range. */
225             if (prot1 != target_prot) {
226                 starts[nranges] = host_start;
227                 lens[nranges] = host_page_size;
228                 prots[nranges] = prot1;
229                 nranges++;
230                 host_start += host_page_size;
231             }
232         }
233 
234         if (last < host_last) {
235             /* Host page contains more than one guest page: sum the prot. */
236             prot1 = target_prot;
237             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
238                 prot1 |= page_get_flags(a + 1);
239             }
240             /* If the resulting sum differs, create a new range. */
241             if (prot1 != target_prot) {
242                 host_last -= host_page_size;
243                 starts[nranges] = host_last + 1;
244                 lens[nranges] = host_page_size;
245                 prots[nranges] = prot1;
246                 nranges++;
247             }
248         }
249 
250         /* Create a range for the middle, if any remains. */
251         if (host_start < host_last) {
252             starts[nranges] = host_start;
253             lens[nranges] = host_last - host_start + 1;
254             prots[nranges] = target_prot;
255             nranges++;
256         }
257     }
258 
259     for (int i = 0; i < nranges; ++i) {
260         ret = mprotect(g2h_untagged(starts[i]), lens[i],
261                        target_to_host_prot(prots[i]));
262         if (ret != 0) {
263             goto error;
264         }
265     }
266 
267     page_set_flags(start, last, page_flags);
268     ret = 0;
269 
270  error:
271     mmap_unlock();
272     return ret;
273 }
274 
275 /*
276  * Perform munmap on behalf of the target, with host parameters.
277  * If reserved_va, we must replace the memory reservation.
278  */
do_munmap(void * addr,size_t len)279 static int do_munmap(void *addr, size_t len)
280 {
281     if (reserved_va) {
282         void *ptr = mmap(addr, len, PROT_NONE,
283                          MAP_FIXED | MAP_ANONYMOUS
284                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
285         return ptr == addr ? 0 : -1;
286     }
287     return munmap(addr, len);
288 }
289 
290 /*
291  * Perform a pread on behalf of target_mmap.  We can reach EOF, we can be
292  * interrupted by signals, and in general there's no good error return path.
293  * If @zero, zero the rest of the block at EOF.
294  * Return true on success.
295  */
mmap_pread(int fd,void * p,size_t len,off_t offset,bool zero)296 static bool mmap_pread(int fd, void *p, size_t len, off_t offset, bool zero)
297 {
298     while (1) {
299         ssize_t r = pread(fd, p, len, offset);
300 
301         if (likely(r == len)) {
302             /* Complete */
303             return true;
304         }
305         if (r == 0) {
306             /* EOF */
307             if (zero) {
308                 memset(p, 0, len);
309             }
310             return true;
311         }
312         if (r > 0) {
313             /* Short read */
314             p += r;
315             len -= r;
316             offset += r;
317         } else if (errno != EINTR) {
318             /* Error */
319             return false;
320         }
321     }
322 }
323 
324 /*
325  * Map an incomplete host page.
326  *
327  * Here be dragons.  This case will not work if there is an existing
328  * overlapping host page, which is file mapped, and for which the mapping
329  * is beyond the end of the file.  In that case, we will see SIGBUS when
330  * trying to write a portion of this page.
331  *
332  * FIXME: Work around this with a temporary signal handler and longjmp.
333  */
mmap_frag(abi_ulong real_start,abi_ulong start,abi_ulong last,int prot,int flags,int fd,off_t offset)334 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
335                       int prot, int flags, int fd, off_t offset)
336 {
337     int host_page_size = qemu_real_host_page_size();
338     abi_ulong real_last;
339     void *host_start;
340     int prot_old, prot_new;
341     int host_prot_old, host_prot_new;
342 
343     if (!(flags & MAP_ANONYMOUS)
344         && (flags & MAP_TYPE) == MAP_SHARED
345         && (prot & PROT_WRITE)) {
346         /*
347          * msync() won't work with the partial page, so we return an
348          * error if write is possible while it is a shared mapping.
349          */
350         errno = EINVAL;
351         return false;
352     }
353 
354     real_last = real_start + host_page_size - 1;
355     host_start = g2h_untagged(real_start);
356 
357     /* Get the protection of the target pages outside the mapping. */
358     prot_old = 0;
359     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
360         prot_old |= page_get_flags(a);
361     }
362     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
363         prot_old |= page_get_flags(a);
364     }
365 
366     if (prot_old == 0) {
367         /*
368          * Since !(prot_old & PAGE_VALID), there were no guest pages
369          * outside of the fragment we need to map.  Allocate a new host
370          * page to cover, discarding whatever else may have been present.
371          */
372         void *p = mmap(host_start, host_page_size,
373                        target_to_host_prot(prot),
374                        flags | MAP_ANONYMOUS, -1, 0);
375         if (p != host_start) {
376             if (p != MAP_FAILED) {
377                 do_munmap(p, host_page_size);
378                 errno = EEXIST;
379             }
380             return false;
381         }
382         prot_old = prot;
383     }
384     prot_new = prot | prot_old;
385 
386     host_prot_old = target_to_host_prot(prot_old);
387     host_prot_new = target_to_host_prot(prot_new);
388 
389     /* Adjust protection to be able to write. */
390     if (!(host_prot_old & PROT_WRITE)) {
391         host_prot_old |= PROT_WRITE;
392         mprotect(host_start, host_page_size, host_prot_old);
393     }
394 
395     /* Read or zero the new guest pages. */
396     if (flags & MAP_ANONYMOUS) {
397         memset(g2h_untagged(start), 0, last - start + 1);
398     } else if (!mmap_pread(fd, g2h_untagged(start), last - start + 1,
399                            offset, true)) {
400         return false;
401     }
402 
403     /* Put final protection */
404     if (host_prot_new != host_prot_old) {
405         mprotect(host_start, host_page_size, host_prot_new);
406     }
407     return true;
408 }
409 
410 abi_ulong task_unmapped_base;
411 abi_ulong elf_et_dyn_base;
412 abi_ulong mmap_next_start;
413 
414 /*
415  * Subroutine of mmap_find_vma, used when we have pre-allocated
416  * a chunk of guest address space.
417  */
mmap_find_vma_reserved(abi_ulong start,abi_ulong size,abi_ulong align)418 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
419                                         abi_ulong align)
420 {
421     target_ulong ret;
422 
423     ret = page_find_range_empty(start, reserved_va, size, align);
424     if (ret == -1 && start > mmap_min_addr) {
425         /* Restart at the beginning of the address space. */
426         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
427     }
428 
429     return ret;
430 }
431 
432 /*
433  * Find and reserve a free memory area of size 'size'. The search
434  * starts at 'start'.
435  * It must be called with mmap_lock() held.
436  * Return -1 if error.
437  */
mmap_find_vma(abi_ulong start,abi_ulong size,abi_ulong align)438 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
439 {
440     int host_page_size = qemu_real_host_page_size();
441     void *ptr, *prev;
442     abi_ulong addr;
443     int wrapped, repeat;
444 
445     align = MAX(align, host_page_size);
446 
447     /* If 'start' == 0, then a default start address is used. */
448     if (start == 0) {
449         start = mmap_next_start;
450     } else {
451         start &= -host_page_size;
452     }
453     start = ROUND_UP(start, align);
454     size = ROUND_UP(size, host_page_size);
455 
456     if (reserved_va) {
457         return mmap_find_vma_reserved(start, size, align);
458     }
459 
460     addr = start;
461     wrapped = repeat = 0;
462     prev = 0;
463 
464     for (;; prev = ptr) {
465         /*
466          * Reserve needed memory area to avoid a race.
467          * It should be discarded using:
468          *  - mmap() with MAP_FIXED flag
469          *  - mremap() with MREMAP_FIXED flag
470          *  - shmat() with SHM_REMAP flag
471          */
472         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
473                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
474 
475         /* ENOMEM, if host address space has no memory */
476         if (ptr == MAP_FAILED) {
477             return (abi_ulong)-1;
478         }
479 
480         /*
481          * Count the number of sequential returns of the same address.
482          * This is used to modify the search algorithm below.
483          */
484         repeat = (ptr == prev ? repeat + 1 : 0);
485 
486         if (h2g_valid(ptr + size - 1)) {
487             addr = h2g(ptr);
488 
489             if ((addr & (align - 1)) == 0) {
490                 /* Success.  */
491                 if (start == mmap_next_start && addr >= task_unmapped_base) {
492                     mmap_next_start = addr + size;
493                 }
494                 return addr;
495             }
496 
497             /* The address is not properly aligned for the target.  */
498             switch (repeat) {
499             case 0:
500                 /*
501                  * Assume the result that the kernel gave us is the
502                  * first with enough free space, so start again at the
503                  * next higher target page.
504                  */
505                 addr = ROUND_UP(addr, align);
506                 break;
507             case 1:
508                 /*
509                  * Sometimes the kernel decides to perform the allocation
510                  * at the top end of memory instead.
511                  */
512                 addr &= -align;
513                 break;
514             case 2:
515                 /* Start over at low memory.  */
516                 addr = 0;
517                 break;
518             default:
519                 /* Fail.  This unaligned block must the last.  */
520                 addr = -1;
521                 break;
522             }
523         } else {
524             /*
525              * Since the result the kernel gave didn't fit, start
526              * again at low memory.  If any repetition, fail.
527              */
528             addr = (repeat ? -1 : 0);
529         }
530 
531         /* Unmap and try again.  */
532         munmap(ptr, size);
533 
534         /* ENOMEM if we checked the whole of the target address space.  */
535         if (addr == (abi_ulong)-1) {
536             return (abi_ulong)-1;
537         } else if (addr == 0) {
538             if (wrapped) {
539                 return (abi_ulong)-1;
540             }
541             wrapped = 1;
542             /*
543              * Don't actually use 0 when wrapping, instead indicate
544              * that we'd truly like an allocation in low memory.
545              */
546             addr = (mmap_min_addr > TARGET_PAGE_SIZE
547                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
548                      : TARGET_PAGE_SIZE);
549         } else if (wrapped && addr >= start) {
550             return (abi_ulong)-1;
551         }
552     }
553 }
554 
555 /*
556  * Record a successful mmap within the user-exec interval tree.
557  */
mmap_end(abi_ulong start,abi_ulong last,abi_ulong passthrough_start,abi_ulong passthrough_last,int flags,int page_flags)558 static abi_long mmap_end(abi_ulong start, abi_ulong last,
559                          abi_ulong passthrough_start,
560                          abi_ulong passthrough_last,
561                          int flags, int page_flags)
562 {
563     if (flags & MAP_ANONYMOUS) {
564         page_flags |= PAGE_ANON;
565     }
566     page_flags |= PAGE_RESET;
567     if (passthrough_start > passthrough_last) {
568         page_set_flags(start, last, page_flags);
569     } else {
570         if (start < passthrough_start) {
571             page_set_flags(start, passthrough_start - 1, page_flags);
572         }
573         page_set_flags(passthrough_start, passthrough_last,
574                        page_flags | PAGE_PASSTHROUGH);
575         if (passthrough_last < last) {
576             page_set_flags(passthrough_last + 1, last, page_flags);
577         }
578     }
579     shm_region_rm_complete(start, last);
580     trace_target_mmap_complete(start);
581     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
582         FILE *f = qemu_log_trylock();
583         if (f) {
584             fprintf(f, "page layout changed following mmap\n");
585             page_dump(f);
586             qemu_log_unlock(f);
587         }
588     }
589     return start;
590 }
591 
592 /*
593  * Special case host page size == target page size,
594  * where there are no edge conditions.
595  */
mmap_h_eq_g(abi_ulong start,abi_ulong len,int host_prot,int flags,int page_flags,int fd,off_t offset)596 static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len,
597                             int host_prot, int flags, int page_flags,
598                             int fd, off_t offset)
599 {
600     void *p, *want_p = NULL;
601     abi_ulong last;
602 
603     if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
604         want_p = g2h_untagged(start);
605     }
606 
607     p = mmap(want_p, len, host_prot, flags, fd, offset);
608     if (p == MAP_FAILED) {
609         return -1;
610     }
611     /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */
612     if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) {
613         do_munmap(p, len);
614         errno = EEXIST;
615         return -1;
616     }
617 
618     start = h2g(p);
619     last = start + len - 1;
620     return mmap_end(start, last, start, last, flags, page_flags);
621 }
622 
623 /*
624  * Special case host page size < target page size.
625  *
626  * The two special cases are increased guest alignment, and mapping
627  * past the end of a file.
628  *
629  * When mapping files into a memory area larger than the file,
630  * accesses to pages beyond the file size will cause a SIGBUS.
631  *
632  * For example, if mmaping a file of 100 bytes on a host with 4K
633  * pages emulating a target with 8K pages, the target expects to
634  * be able to access the first 8K. But the host will trap us on
635  * any access beyond 4K.
636  *
637  * When emulating a target with a larger page-size than the hosts,
638  * we may need to truncate file maps at EOF and add extra anonymous
639  * pages up to the targets page boundary.
640  *
641  * This workaround only works for files that do not change.
642  * If the file is later extended (e.g. ftruncate), the SIGBUS
643  * vanishes and the proper behaviour is that changes within the
644  * anon page should be reflected in the file.
645  *
646  * However, this case is rather common with executable images,
647  * so the workaround is important for even trivial tests, whereas
648  * the mmap of of a file being extended is less common.
649  */
mmap_h_lt_g(abi_ulong start,abi_ulong len,int host_prot,int mmap_flags,int page_flags,int fd,off_t offset,int host_page_size)650 static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot,
651                             int mmap_flags, int page_flags, int fd,
652                             off_t offset, int host_page_size)
653 {
654     void *p, *want_p = NULL;
655     off_t fileend_adj = 0;
656     int flags = mmap_flags;
657     abi_ulong last, pass_last;
658 
659     if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
660         want_p = g2h_untagged(start);
661     }
662 
663     if (!(flags & MAP_ANONYMOUS)) {
664         struct stat sb;
665 
666         if (fstat(fd, &sb) == -1) {
667             return -1;
668         }
669         if (offset >= sb.st_size) {
670             /*
671              * The entire map is beyond the end of the file.
672              * Transform it to an anonymous mapping.
673              */
674             flags |= MAP_ANONYMOUS;
675             fd = -1;
676             offset = 0;
677         } else if (offset + len > sb.st_size) {
678             /*
679              * A portion of the map is beyond the end of the file.
680              * Truncate the file portion of the allocation.
681              */
682             fileend_adj = offset + len - sb.st_size;
683         }
684     }
685 
686     if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
687         if (fileend_adj) {
688             p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
689         } else {
690             p = mmap(want_p, len, host_prot, flags, fd, offset);
691         }
692         if (p != want_p) {
693             if (p != MAP_FAILED) {
694                 /* Host does not support MAP_FIXED_NOREPLACE: emulate. */
695                 do_munmap(p, len);
696                 errno = EEXIST;
697             }
698             return -1;
699         }
700 
701         if (fileend_adj) {
702             void *t = mmap(p, len - fileend_adj, host_prot,
703                            (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
704                            fd, offset);
705 
706             if (t == MAP_FAILED) {
707                 int save_errno = errno;
708 
709                 /*
710                  * We failed a map over the top of the successful anonymous
711                  * mapping above. The only failure mode is running out of VMAs,
712                  * and there's nothing that we can do to detect that earlier.
713                  * If we have replaced an existing mapping with MAP_FIXED,
714                  * then we cannot properly recover.  It's a coin toss whether
715                  * it would be better to exit or continue here.
716                  */
717                 if (!(flags & MAP_FIXED_NOREPLACE) &&
718                     !page_check_range_empty(start, start + len - 1)) {
719                     qemu_log("QEMU target_mmap late failure: %s",
720                              strerror(save_errno));
721                 }
722 
723                 do_munmap(want_p, len);
724                 errno = save_errno;
725                 return -1;
726             }
727         }
728     } else {
729         size_t host_len, part_len;
730 
731         /*
732          * Take care to align the host memory.  Perform a larger anonymous
733          * allocation and extract the aligned portion.  Remap the file on
734          * top of that.
735          */
736         host_len = len + TARGET_PAGE_SIZE - host_page_size;
737         p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
738         if (p == MAP_FAILED) {
739             return -1;
740         }
741 
742         part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1);
743         if (part_len) {
744             part_len = TARGET_PAGE_SIZE - part_len;
745             do_munmap(p, part_len);
746             p += part_len;
747             host_len -= part_len;
748         }
749         if (len < host_len) {
750             do_munmap(p + len, host_len - len);
751         }
752 
753         if (!(flags & MAP_ANONYMOUS)) {
754             void *t = mmap(p, len - fileend_adj, host_prot,
755                            flags | MAP_FIXED, fd, offset);
756 
757             if (t == MAP_FAILED) {
758                 int save_errno = errno;
759                 do_munmap(p, len);
760                 errno = save_errno;
761                 return -1;
762             }
763         }
764 
765         start = h2g(p);
766     }
767 
768     last = start + len - 1;
769     if (fileend_adj) {
770         pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1;
771     } else {
772         pass_last = last;
773     }
774     return mmap_end(start, last, start, pass_last, mmap_flags, page_flags);
775 }
776 
777 /*
778  * Special case host page size > target page size.
779  *
780  * The two special cases are address and file offsets that are valid
781  * for the guest that cannot be directly represented by the host.
782  */
mmap_h_gt_g(abi_ulong start,abi_ulong len,int target_prot,int host_prot,int flags,int page_flags,int fd,off_t offset,int host_page_size)783 static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len,
784                             int target_prot, int host_prot,
785                             int flags, int page_flags, int fd,
786                             off_t offset, int host_page_size)
787 {
788     void *p, *want_p = NULL;
789     off_t host_offset = offset & -host_page_size;
790     abi_ulong last, real_start, real_last;
791     bool misaligned_offset = false;
792     size_t host_len;
793 
794     if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
795         want_p = g2h_untagged(start);
796     }
797 
798     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
799         /*
800          * Adjust the offset to something representable on the host.
801          */
802         host_len = len + offset - host_offset;
803         p = mmap(want_p, host_len, host_prot, flags, fd, host_offset);
804         if (p == MAP_FAILED) {
805             return -1;
806         }
807 
808         /* Update start to the file position at offset. */
809         p += offset - host_offset;
810 
811         start = h2g(p);
812         last = start + len - 1;
813         return mmap_end(start, last, start, last, flags, page_flags);
814     }
815 
816     if (!(flags & MAP_ANONYMOUS)) {
817         misaligned_offset = (start ^ offset) & (host_page_size - 1);
818 
819         /*
820          * The fallback for misalignment is a private mapping + read.
821          * This carries none of semantics required of MAP_SHARED.
822          */
823         if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) {
824             errno = EINVAL;
825             return -1;
826         }
827     }
828 
829     last = start + len - 1;
830     real_start = start & -host_page_size;
831     real_last = ROUND_UP(last, host_page_size) - 1;
832 
833     /*
834      * Handle the start and end of the mapping.
835      */
836     if (real_start < start) {
837         abi_ulong real_page_last = real_start + host_page_size - 1;
838         if (last <= real_page_last) {
839             /* Entire allocation a subset of one host page. */
840             if (!mmap_frag(real_start, start, last, target_prot,
841                            flags, fd, offset)) {
842                 return -1;
843             }
844             return mmap_end(start, last, -1, 0, flags, page_flags);
845         }
846 
847         if (!mmap_frag(real_start, start, real_page_last, target_prot,
848                        flags, fd, offset)) {
849             return -1;
850         }
851         real_start = real_page_last + 1;
852     }
853 
854     if (last < real_last) {
855         abi_ulong real_page_start = real_last - host_page_size + 1;
856         if (!mmap_frag(real_page_start, real_page_start, last,
857                        target_prot, flags, fd,
858                        offset + real_page_start - start)) {
859             return -1;
860         }
861         real_last = real_page_start - 1;
862     }
863 
864     if (real_start > real_last) {
865         return mmap_end(start, last, -1, 0, flags, page_flags);
866     }
867 
868     /*
869      * Handle the middle of the mapping.
870      */
871 
872     host_len = real_last - real_start + 1;
873     want_p += real_start - start;
874 
875     if (flags & MAP_ANONYMOUS) {
876         p = mmap(want_p, host_len, host_prot, flags, -1, 0);
877     } else if (!misaligned_offset) {
878         p = mmap(want_p, host_len, host_prot, flags, fd,
879                  offset + real_start - start);
880     } else {
881         p = mmap(want_p, host_len, host_prot | PROT_WRITE,
882                  flags | MAP_ANONYMOUS, -1, 0);
883     }
884     if (p != want_p) {
885         if (p != MAP_FAILED) {
886             do_munmap(p, host_len);
887             errno = EEXIST;
888         }
889         return -1;
890     }
891 
892     if (misaligned_offset) {
893         if (!mmap_pread(fd, p, host_len, offset + real_start - start, false)) {
894             do_munmap(p, host_len);
895             return -1;
896         }
897         if (!(host_prot & PROT_WRITE)) {
898             mprotect(p, host_len, host_prot);
899         }
900     }
901 
902     return mmap_end(start, last, -1, 0, flags, page_flags);
903 }
904 
target_mmap__locked(abi_ulong start,abi_ulong len,int target_prot,int flags,int page_flags,int fd,off_t offset)905 static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
906                                     int target_prot, int flags, int page_flags,
907                                     int fd, off_t offset)
908 {
909     int host_page_size = qemu_real_host_page_size();
910     int host_prot;
911 
912     /*
913      * For reserved_va, we are in full control of the allocation.
914      * Find a suitable hole and convert to MAP_FIXED.
915      */
916     if (reserved_va) {
917         if (flags & MAP_FIXED_NOREPLACE) {
918             /* Validate that the chosen range is empty. */
919             if (!page_check_range_empty(start, start + len - 1)) {
920                 errno = EEXIST;
921                 return -1;
922             }
923             flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
924         } else if (!(flags & MAP_FIXED)) {
925             abi_ulong real_start = start & -host_page_size;
926             off_t host_offset = offset & -host_page_size;
927             size_t real_len = len + offset - host_offset;
928             abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE);
929 
930             start = mmap_find_vma(real_start, real_len, align);
931             if (start == (abi_ulong)-1) {
932                 errno = ENOMEM;
933                 return -1;
934             }
935             start += offset - host_offset;
936             flags |= MAP_FIXED;
937         }
938     }
939 
940     host_prot = target_to_host_prot(target_prot);
941 
942     if (host_page_size == TARGET_PAGE_SIZE) {
943         return mmap_h_eq_g(start, len, host_prot, flags,
944                            page_flags, fd, offset);
945     } else if (host_page_size < TARGET_PAGE_SIZE) {
946         return mmap_h_lt_g(start, len, host_prot, flags,
947                            page_flags, fd, offset, host_page_size);
948     } else {
949         return mmap_h_gt_g(start, len, target_prot, host_prot, flags,
950                            page_flags, fd, offset, host_page_size);
951     }
952 }
953 
954 /* NOTE: all the constants are the HOST ones */
target_mmap(abi_ulong start,abi_ulong len,int target_prot,int flags,int fd,off_t offset)955 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
956                      int flags, int fd, off_t offset)
957 {
958     abi_long ret;
959     int page_flags;
960 
961     trace_target_mmap(start, len, target_prot, flags, fd, offset);
962 
963     if (!len) {
964         errno = EINVAL;
965         return -1;
966     }
967 
968     page_flags = validate_prot_to_pageflags(target_prot);
969     if (!page_flags) {
970         errno = EINVAL;
971         return -1;
972     }
973 
974     /* Also check for overflows... */
975     len = TARGET_PAGE_ALIGN(len);
976     if (!len || len != (size_t)len) {
977         errno = ENOMEM;
978         return -1;
979     }
980 
981     if (offset & ~TARGET_PAGE_MASK) {
982         errno = EINVAL;
983         return -1;
984     }
985     if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
986         if (start & ~TARGET_PAGE_MASK) {
987             errno = EINVAL;
988             return -1;
989         }
990         if (!guest_range_valid_untagged(start, len)) {
991             errno = ENOMEM;
992             return -1;
993         }
994     }
995 
996     mmap_lock();
997 
998     ret = target_mmap__locked(start, len, target_prot, flags,
999                               page_flags, fd, offset);
1000 
1001     mmap_unlock();
1002 
1003     /*
1004      * If we're mapping shared memory, ensure we generate code for parallel
1005      * execution and flush old translations.  This will work up to the level
1006      * supported by the host -- anything that requires EXCP_ATOMIC will not
1007      * be atomic with respect to an external process.
1008      */
1009     if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) {
1010         CPUState *cpu = thread_cpu;
1011         if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
1012             tcg_cflags_set(cpu, CF_PARALLEL);
1013             tb_flush(cpu);
1014         }
1015     }
1016 
1017     return ret;
1018 }
1019 
mmap_reserve_or_unmap(abi_ulong start,abi_ulong len)1020 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
1021 {
1022     int host_page_size = qemu_real_host_page_size();
1023     abi_ulong real_start;
1024     abi_ulong real_last;
1025     abi_ulong real_len;
1026     abi_ulong last;
1027     abi_ulong a;
1028     void *host_start;
1029     int prot;
1030 
1031     last = start + len - 1;
1032     real_start = start & -host_page_size;
1033     real_last = ROUND_UP(last, host_page_size) - 1;
1034 
1035     /*
1036      * If guest pages remain on the first or last host pages,
1037      * adjust the deallocation to retain those guest pages.
1038      * The single page special case is required for the last page,
1039      * lest real_start overflow to zero.
1040      */
1041     if (real_last - real_start < host_page_size) {
1042         prot = 0;
1043         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
1044             prot |= page_get_flags(a);
1045         }
1046         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
1047             prot |= page_get_flags(a + 1);
1048         }
1049         if (prot != 0) {
1050             return 0;
1051         }
1052     } else {
1053         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
1054             prot |= page_get_flags(a);
1055         }
1056         if (prot != 0) {
1057             real_start += host_page_size;
1058         }
1059 
1060         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
1061             prot |= page_get_flags(a + 1);
1062         }
1063         if (prot != 0) {
1064             real_last -= host_page_size;
1065         }
1066 
1067         if (real_last < real_start) {
1068             return 0;
1069         }
1070     }
1071 
1072     real_len = real_last - real_start + 1;
1073     host_start = g2h_untagged(real_start);
1074 
1075     return do_munmap(host_start, real_len);
1076 }
1077 
target_munmap(abi_ulong start,abi_ulong len)1078 int target_munmap(abi_ulong start, abi_ulong len)
1079 {
1080     int ret;
1081 
1082     trace_target_munmap(start, len);
1083 
1084     if (start & ~TARGET_PAGE_MASK) {
1085         errno = EINVAL;
1086         return -1;
1087     }
1088     len = TARGET_PAGE_ALIGN(len);
1089     if (len == 0 || !guest_range_valid_untagged(start, len)) {
1090         errno = EINVAL;
1091         return -1;
1092     }
1093 
1094     mmap_lock();
1095     ret = mmap_reserve_or_unmap(start, len);
1096     if (likely(ret == 0)) {
1097         page_set_flags(start, start + len - 1, 0);
1098         shm_region_rm_complete(start, start + len - 1);
1099     }
1100     mmap_unlock();
1101 
1102     return ret;
1103 }
1104 
target_mremap(abi_ulong old_addr,abi_ulong old_size,abi_ulong new_size,unsigned long flags,abi_ulong new_addr)1105 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
1106                        abi_ulong new_size, unsigned long flags,
1107                        abi_ulong new_addr)
1108 {
1109     int prot;
1110     void *host_addr;
1111 
1112     if (!guest_range_valid_untagged(old_addr, old_size) ||
1113         ((flags & MREMAP_FIXED) &&
1114          !guest_range_valid_untagged(new_addr, new_size)) ||
1115         ((flags & MREMAP_MAYMOVE) == 0 &&
1116          !guest_range_valid_untagged(old_addr, new_size))) {
1117         errno = ENOMEM;
1118         return -1;
1119     }
1120 
1121     mmap_lock();
1122 
1123     if (flags & MREMAP_FIXED) {
1124         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
1125                            flags, g2h_untagged(new_addr));
1126 
1127         if (reserved_va && host_addr != MAP_FAILED) {
1128             /*
1129              * If new and old addresses overlap then the above mremap will
1130              * already have failed with EINVAL.
1131              */
1132             mmap_reserve_or_unmap(old_addr, old_size);
1133         }
1134     } else if (flags & MREMAP_MAYMOVE) {
1135         abi_ulong mmap_start;
1136 
1137         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
1138 
1139         if (mmap_start == -1) {
1140             errno = ENOMEM;
1141             host_addr = MAP_FAILED;
1142         } else {
1143             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
1144                                flags | MREMAP_FIXED,
1145                                g2h_untagged(mmap_start));
1146             if (reserved_va) {
1147                 mmap_reserve_or_unmap(old_addr, old_size);
1148             }
1149         }
1150     } else {
1151         int page_flags = 0;
1152         if (reserved_va && old_size < new_size) {
1153             abi_ulong addr;
1154             for (addr = old_addr + old_size;
1155                  addr < old_addr + new_size;
1156                  addr++) {
1157                 page_flags |= page_get_flags(addr);
1158             }
1159         }
1160         if (page_flags == 0) {
1161             host_addr = mremap(g2h_untagged(old_addr),
1162                                old_size, new_size, flags);
1163 
1164             if (host_addr != MAP_FAILED) {
1165                 /* Check if address fits target address space */
1166                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
1167                     /* Revert mremap() changes */
1168                     host_addr = mremap(g2h_untagged(old_addr),
1169                                        new_size, old_size, flags);
1170                     errno = ENOMEM;
1171                     host_addr = MAP_FAILED;
1172                 } else if (reserved_va && old_size > new_size) {
1173                     mmap_reserve_or_unmap(old_addr + old_size,
1174                                           old_size - new_size);
1175                 }
1176             }
1177         } else {
1178             errno = ENOMEM;
1179             host_addr = MAP_FAILED;
1180         }
1181     }
1182 
1183     if (host_addr == MAP_FAILED) {
1184         new_addr = -1;
1185     } else {
1186         new_addr = h2g(host_addr);
1187         prot = page_get_flags(old_addr);
1188         page_set_flags(old_addr, old_addr + old_size - 1, 0);
1189         shm_region_rm_complete(old_addr, old_addr + old_size - 1);
1190         page_set_flags(new_addr, new_addr + new_size - 1,
1191                        prot | PAGE_VALID | PAGE_RESET);
1192         shm_region_rm_complete(new_addr, new_addr + new_size - 1);
1193     }
1194     mmap_unlock();
1195     return new_addr;
1196 }
1197 
target_madvise(abi_ulong start,abi_ulong len_in,int advice)1198 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
1199 {
1200     abi_ulong len;
1201     int ret = 0;
1202 
1203     if (start & ~TARGET_PAGE_MASK) {
1204         return -TARGET_EINVAL;
1205     }
1206     if (len_in == 0) {
1207         return 0;
1208     }
1209     len = TARGET_PAGE_ALIGN(len_in);
1210     if (len == 0 || !guest_range_valid_untagged(start, len)) {
1211         return -TARGET_EINVAL;
1212     }
1213 
1214     /* Translate for some architectures which have different MADV_xxx values */
1215     switch (advice) {
1216     case TARGET_MADV_DONTNEED:      /* alpha */
1217         advice = MADV_DONTNEED;
1218         break;
1219     case TARGET_MADV_WIPEONFORK:    /* parisc */
1220         advice = MADV_WIPEONFORK;
1221         break;
1222     case TARGET_MADV_KEEPONFORK:    /* parisc */
1223         advice = MADV_KEEPONFORK;
1224         break;
1225     /* we do not care about the other MADV_xxx values yet */
1226     }
1227 
1228     /*
1229      * Most advice values are hints, so ignoring and returning success is ok.
1230      *
1231      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
1232      * MADV_KEEPONFORK are not hints and need to be emulated.
1233      *
1234      * A straight passthrough for those may not be safe because qemu sometimes
1235      * turns private file-backed mappings into anonymous mappings.
1236      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
1237      * same semantics for the host as for the guest.
1238      *
1239      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
1240      * return failure if not.
1241      *
1242      * MADV_DONTNEED is passed through as well, if possible.
1243      * If passthrough isn't possible, we nevertheless (wrongly!) return
1244      * success, which is broken but some userspace programs fail to work
1245      * otherwise. Completely implementing such emulation is quite complicated
1246      * though.
1247      */
1248     mmap_lock();
1249     switch (advice) {
1250     case MADV_WIPEONFORK:
1251     case MADV_KEEPONFORK:
1252         ret = -EINVAL;
1253         /* fall through */
1254     case MADV_DONTNEED:
1255         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
1256             ret = get_errno(madvise(g2h_untagged(start), len, advice));
1257             if ((advice == MADV_DONTNEED) && (ret == 0)) {
1258                 page_reset_target_data(start, start + len - 1);
1259             }
1260         }
1261     }
1262     mmap_unlock();
1263 
1264     return ret;
1265 }
1266 
1267 #ifndef TARGET_FORCE_SHMLBA
1268 /*
1269  * For most architectures, SHMLBA is the same as the page size;
1270  * some architectures have larger values, in which case they should
1271  * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
1272  * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
1273  * and defining its own value for SHMLBA.
1274  *
1275  * The kernel also permits SHMLBA to be set by the architecture to a
1276  * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
1277  * this means that addresses are rounded to the large size if
1278  * SHM_RND is set but addresses not aligned to that size are not rejected
1279  * as long as they are at least page-aligned. Since the only architecture
1280  * which uses this is ia64 this code doesn't provide for that oddity.
1281  */
target_shmlba(CPUArchState * cpu_env)1282 static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
1283 {
1284     return TARGET_PAGE_SIZE;
1285 }
1286 #endif
1287 
1288 #if defined(__arm__) || defined(__mips__) || defined(__sparc__)
1289 #define HOST_FORCE_SHMLBA 1
1290 #else
1291 #define HOST_FORCE_SHMLBA 0
1292 #endif
1293 
target_shmat(CPUArchState * cpu_env,int shmid,abi_ulong shmaddr,int shmflg)1294 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
1295                        abi_ulong shmaddr, int shmflg)
1296 {
1297     CPUState *cpu = env_cpu(cpu_env);
1298     struct shmid_ds shm_info;
1299     int ret;
1300     int h_pagesize;
1301     int t_shmlba, h_shmlba, m_shmlba;
1302     size_t t_len, h_len, m_len;
1303 
1304     /* shmat pointers are always untagged */
1305 
1306     /*
1307      * Because we can't use host shmat() unless the address is sufficiently
1308      * aligned for the host, we'll need to check both.
1309      * TODO: Could be fixed with softmmu.
1310      */
1311     t_shmlba = target_shmlba(cpu_env);
1312     h_pagesize = qemu_real_host_page_size();
1313     h_shmlba = (HOST_FORCE_SHMLBA ? SHMLBA : h_pagesize);
1314     m_shmlba = MAX(t_shmlba, h_shmlba);
1315 
1316     if (shmaddr) {
1317         if (shmaddr & (m_shmlba - 1)) {
1318             if (shmflg & SHM_RND) {
1319                 /*
1320                  * The guest is allowing the kernel to round the address.
1321                  * Assume that the guest is ok with us rounding to the
1322                  * host required alignment too.  Anyway if we don't, we'll
1323                  * get an error from the kernel.
1324                  */
1325                 shmaddr &= ~(m_shmlba - 1);
1326                 if (shmaddr == 0 && (shmflg & SHM_REMAP)) {
1327                     return -TARGET_EINVAL;
1328                 }
1329             } else {
1330                 int require = TARGET_PAGE_SIZE;
1331 #ifdef TARGET_FORCE_SHMLBA
1332                 require = t_shmlba;
1333 #endif
1334                 /*
1335                  * Include host required alignment, as otherwise we cannot
1336                  * use host shmat at all.
1337                  */
1338                 require = MAX(require, h_shmlba);
1339                 if (shmaddr & (require - 1)) {
1340                     return -TARGET_EINVAL;
1341                 }
1342             }
1343         }
1344     } else {
1345         if (shmflg & SHM_REMAP) {
1346             return -TARGET_EINVAL;
1347         }
1348     }
1349     /* All rounding now manually concluded. */
1350     shmflg &= ~SHM_RND;
1351 
1352     /* Find out the length of the shared memory segment. */
1353     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
1354     if (is_error(ret)) {
1355         /* can't get length, bail out */
1356         return ret;
1357     }
1358     t_len = TARGET_PAGE_ALIGN(shm_info.shm_segsz);
1359     h_len = ROUND_UP(shm_info.shm_segsz, h_pagesize);
1360     m_len = MAX(t_len, h_len);
1361 
1362     if (!guest_range_valid_untagged(shmaddr, m_len)) {
1363         return -TARGET_EINVAL;
1364     }
1365 
1366     WITH_MMAP_LOCK_GUARD() {
1367         bool mapped = false;
1368         void *want, *test;
1369         abi_ulong last;
1370 
1371         if (!shmaddr) {
1372             shmaddr = mmap_find_vma(0, m_len, m_shmlba);
1373             if (shmaddr == -1) {
1374                 return -TARGET_ENOMEM;
1375             }
1376             mapped = !reserved_va;
1377         } else if (shmflg & SHM_REMAP) {
1378             /*
1379              * If host page size > target page size, the host shmat may map
1380              * more memory than the guest expects.  Reject a mapping that
1381              * would replace memory in the unexpected gap.
1382              * TODO: Could be fixed with softmmu.
1383              */
1384             if (t_len < h_len &&
1385                 !page_check_range_empty(shmaddr + t_len,
1386                                         shmaddr + h_len - 1)) {
1387                 return -TARGET_EINVAL;
1388             }
1389         } else {
1390             if (!page_check_range_empty(shmaddr, shmaddr + m_len - 1)) {
1391                 return -TARGET_EINVAL;
1392             }
1393         }
1394 
1395         /* All placement is now complete. */
1396         want = (void *)g2h_untagged(shmaddr);
1397 
1398         /*
1399          * Map anonymous pages across the entire range, then remap with
1400          * the shared memory.  This is required for a number of corner
1401          * cases for which host and guest page sizes differ.
1402          */
1403         if (h_len != t_len) {
1404             int mmap_p = PROT_READ | (shmflg & SHM_RDONLY ? 0 : PROT_WRITE);
1405             int mmap_f = MAP_PRIVATE | MAP_ANONYMOUS
1406                        | (reserved_va || mapped || (shmflg & SHM_REMAP)
1407                           ? MAP_FIXED : MAP_FIXED_NOREPLACE);
1408 
1409             test = mmap(want, m_len, mmap_p, mmap_f, -1, 0);
1410             if (unlikely(test != want)) {
1411                 /* shmat returns EINVAL not EEXIST like mmap. */
1412                 ret = (test == MAP_FAILED && errno != EEXIST
1413                        ? get_errno(-1) : -TARGET_EINVAL);
1414                 if (mapped) {
1415                     do_munmap(want, m_len);
1416                 }
1417                 return ret;
1418             }
1419             mapped = true;
1420         }
1421 
1422         if (reserved_va || mapped) {
1423             shmflg |= SHM_REMAP;
1424         }
1425         test = shmat(shmid, want, shmflg);
1426         if (test == MAP_FAILED) {
1427             ret = get_errno(-1);
1428             if (mapped) {
1429                 do_munmap(want, m_len);
1430             }
1431             return ret;
1432         }
1433         assert(test == want);
1434 
1435         last = shmaddr + m_len - 1;
1436         page_set_flags(shmaddr, last,
1437                        PAGE_VALID | PAGE_RESET | PAGE_READ |
1438                        (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE) |
1439                        (shmflg & SHM_EXEC ? PAGE_EXEC : 0));
1440 
1441         shm_region_rm_complete(shmaddr, last);
1442         shm_region_add(shmaddr, last);
1443     }
1444 
1445     /*
1446      * We're mapping shared memory, so ensure we generate code for parallel
1447      * execution and flush old translations.  This will work up to the level
1448      * supported by the host -- anything that requires EXCP_ATOMIC will not
1449      * be atomic with respect to an external process.
1450      */
1451     if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
1452         tcg_cflags_set(cpu, CF_PARALLEL);
1453         tb_flush(cpu);
1454     }
1455 
1456     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
1457         FILE *f = qemu_log_trylock();
1458         if (f) {
1459             fprintf(f, "page layout changed following shmat\n");
1460             page_dump(f);
1461             qemu_log_unlock(f);
1462         }
1463     }
1464     return shmaddr;
1465 }
1466 
target_shmdt(abi_ulong shmaddr)1467 abi_long target_shmdt(abi_ulong shmaddr)
1468 {
1469     abi_long rv;
1470 
1471     /* shmdt pointers are always untagged */
1472 
1473     WITH_MMAP_LOCK_GUARD() {
1474         abi_ulong last = shm_region_find(shmaddr);
1475         if (last == 0) {
1476             return -TARGET_EINVAL;
1477         }
1478 
1479         rv = get_errno(shmdt(g2h_untagged(shmaddr)));
1480         if (rv == 0) {
1481             abi_ulong size = last - shmaddr + 1;
1482 
1483             page_set_flags(shmaddr, last, 0);
1484             shm_region_rm_complete(shmaddr, last);
1485             mmap_reserve_or_unmap(shmaddr, size);
1486         }
1487     }
1488     return rv;
1489 }
1490