xref: /qemu/linux-user/mmap.c (revision 2d708164e0475064e0e2167bd73e8570e22df1e0)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     assert(mmap_lock_count > 0);
40     if (--mmap_lock_count == 0) {
41         pthread_mutex_unlock(&mmap_mutex);
42     }
43 }
44 
45 bool have_mmap_lock(void)
46 {
47     return mmap_lock_count > 0 ? true : false;
48 }
49 
50 /* Grab lock to make sure things are in a consistent state after fork().  */
51 void mmap_fork_start(void)
52 {
53     if (mmap_lock_count)
54         abort();
55     pthread_mutex_lock(&mmap_mutex);
56 }
57 
58 void mmap_fork_end(int child)
59 {
60     if (child) {
61         pthread_mutex_init(&mmap_mutex, NULL);
62     } else {
63         pthread_mutex_unlock(&mmap_mutex);
64     }
65 }
66 
67 /*
68  * Validate target prot bitmask.
69  * Return the prot bitmask for the host in *HOST_PROT.
70  * Return 0 if the target prot bitmask is invalid, otherwise
71  * the internal qemu page_flags (which will include PAGE_VALID).
72  */
73 static int validate_prot_to_pageflags(int prot)
74 {
75     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
76     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
77 
78 #ifdef TARGET_AARCH64
79     {
80         ARMCPU *cpu = ARM_CPU(thread_cpu);
81 
82         /*
83          * The PROT_BTI bit is only accepted if the cpu supports the feature.
84          * Since this is the unusual case, don't bother checking unless
85          * the bit has been requested.  If set and valid, record the bit
86          * within QEMU's page_flags.
87          */
88         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
89             valid |= TARGET_PROT_BTI;
90             page_flags |= PAGE_BTI;
91         }
92         /* Similarly for the PROT_MTE bit. */
93         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
94             valid |= TARGET_PROT_MTE;
95             page_flags |= PAGE_MTE;
96         }
97     }
98 #elif defined(TARGET_HPPA)
99     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
100 #endif
101 
102     return prot & ~valid ? 0 : page_flags;
103 }
104 
105 /*
106  * For the host, we need not pass anything except read/write/exec.
107  * While PROT_SEM is allowed by all hosts, it is also ignored, so
108  * don't bother transforming guest bit to host bit.  Any other
109  * target-specific prot bits will not be understood by the host
110  * and will need to be encoded into page_flags for qemu emulation.
111  *
112  * Pages that are executable by the guest will never be executed
113  * by the host, but the host will need to be able to read them.
114  */
115 static int target_to_host_prot(int prot)
116 {
117     return (prot & (PROT_READ | PROT_WRITE)) |
118            (prot & PROT_EXEC ? PROT_READ : 0);
119 }
120 
121 /* NOTE: all the constants are the HOST ones, but addresses are target. */
122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
123 {
124     abi_ulong starts[3];
125     abi_ulong lens[3];
126     int prots[3];
127     abi_ulong host_start, host_last, last;
128     int prot1, ret, page_flags, nranges;
129 
130     trace_target_mprotect(start, len, target_prot);
131 
132     if ((start & ~TARGET_PAGE_MASK) != 0) {
133         return -TARGET_EINVAL;
134     }
135     page_flags = validate_prot_to_pageflags(target_prot);
136     if (!page_flags) {
137         return -TARGET_EINVAL;
138     }
139     if (len == 0) {
140         return 0;
141     }
142     len = TARGET_PAGE_ALIGN(len);
143     if (!guest_range_valid_untagged(start, len)) {
144         return -TARGET_ENOMEM;
145     }
146 
147     last = start + len - 1;
148     host_start = start & qemu_host_page_mask;
149     host_last = HOST_PAGE_ALIGN(last) - 1;
150     nranges = 0;
151 
152     mmap_lock();
153 
154     if (host_last - host_start < qemu_host_page_size) {
155         /* Single host page contains all guest pages: sum the prot. */
156         prot1 = target_prot;
157         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
158             prot1 |= page_get_flags(a);
159         }
160         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
161             prot1 |= page_get_flags(a + 1);
162         }
163         starts[nranges] = host_start;
164         lens[nranges] = qemu_host_page_size;
165         prots[nranges] = prot1;
166         nranges++;
167     } else {
168         if (host_start < start) {
169             /* Host page contains more than one guest page: sum the prot. */
170             prot1 = target_prot;
171             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
172                 prot1 |= page_get_flags(a);
173             }
174             /* If the resulting sum differs, create a new range. */
175             if (prot1 != target_prot) {
176                 starts[nranges] = host_start;
177                 lens[nranges] = qemu_host_page_size;
178                 prots[nranges] = prot1;
179                 nranges++;
180                 host_start += qemu_host_page_size;
181             }
182         }
183 
184         if (last < host_last) {
185             /* Host page contains more than one guest page: sum the prot. */
186             prot1 = target_prot;
187             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
188                 prot1 |= page_get_flags(a + 1);
189             }
190             /* If the resulting sum differs, create a new range. */
191             if (prot1 != target_prot) {
192                 host_last -= qemu_host_page_size;
193                 starts[nranges] = host_last + 1;
194                 lens[nranges] = qemu_host_page_size;
195                 prots[nranges] = prot1;
196                 nranges++;
197             }
198         }
199 
200         /* Create a range for the middle, if any remains. */
201         if (host_start < host_last) {
202             starts[nranges] = host_start;
203             lens[nranges] = host_last - host_start + 1;
204             prots[nranges] = target_prot;
205             nranges++;
206         }
207     }
208 
209     for (int i = 0; i < nranges; ++i) {
210         ret = mprotect(g2h_untagged(starts[i]), lens[i],
211                        target_to_host_prot(prots[i]));
212         if (ret != 0) {
213             goto error;
214         }
215     }
216 
217     page_set_flags(start, last, page_flags);
218     ret = 0;
219 
220  error:
221     mmap_unlock();
222     return ret;
223 }
224 
225 /* map an incomplete host page */
226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
227                       int prot, int flags, int fd, off_t offset)
228 {
229     abi_ulong real_last;
230     void *host_start;
231     int prot_old, prot_new;
232     int host_prot_old, host_prot_new;
233 
234     if (!(flags & MAP_ANONYMOUS)
235         && (flags & MAP_TYPE) == MAP_SHARED
236         && (prot & PROT_WRITE)) {
237         /*
238          * msync() won't work with the partial page, so we return an
239          * error if write is possible while it is a shared mapping.
240          */
241         errno = EINVAL;
242         return false;
243     }
244 
245     real_last = real_start + qemu_host_page_size - 1;
246     host_start = g2h_untagged(real_start);
247 
248     /* Get the protection of the target pages outside the mapping. */
249     prot_old = 0;
250     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
251         prot_old |= page_get_flags(a);
252     }
253     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
254         prot_old |= page_get_flags(a);
255     }
256 
257     if (prot_old == 0) {
258         /*
259          * Since !(prot_old & PAGE_VALID), there were no guest pages
260          * outside of the fragment we need to map.  Allocate a new host
261          * page to cover, discarding whatever else may have been present.
262          */
263         void *p = mmap(host_start, qemu_host_page_size,
264                        target_to_host_prot(prot),
265                        flags | MAP_ANONYMOUS, -1, 0);
266         if (p != host_start) {
267             if (p != MAP_FAILED) {
268                 munmap(p, qemu_host_page_size);
269                 errno = EEXIST;
270             }
271             return false;
272         }
273         prot_old = prot;
274     }
275     prot_new = prot | prot_old;
276 
277     host_prot_old = target_to_host_prot(prot_old);
278     host_prot_new = target_to_host_prot(prot_new);
279 
280     /* Adjust protection to be able to write. */
281     if (!(host_prot_old & PROT_WRITE)) {
282         host_prot_old |= PROT_WRITE;
283         mprotect(host_start, qemu_host_page_size, host_prot_old);
284     }
285 
286     /* Read or zero the new guest pages. */
287     if (flags & MAP_ANONYMOUS) {
288         memset(g2h_untagged(start), 0, last - start + 1);
289     } else {
290         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
291             return false;
292         }
293     }
294 
295     /* Put final protection */
296     if (host_prot_new != host_prot_old) {
297         mprotect(host_start, qemu_host_page_size, host_prot_new);
298     }
299     return true;
300 }
301 
302 abi_ulong task_unmapped_base;
303 abi_ulong mmap_next_start;
304 
305 /*
306  * Subroutine of mmap_find_vma, used when we have pre-allocated
307  * a chunk of guest address space.
308  */
309 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
310                                         abi_ulong align)
311 {
312     target_ulong ret;
313 
314     ret = page_find_range_empty(start, reserved_va, size, align);
315     if (ret == -1 && start > mmap_min_addr) {
316         /* Restart at the beginning of the address space. */
317         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
318     }
319 
320     return ret;
321 }
322 
323 /*
324  * Find and reserve a free memory area of size 'size'. The search
325  * starts at 'start'.
326  * It must be called with mmap_lock() held.
327  * Return -1 if error.
328  */
329 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
330 {
331     void *ptr, *prev;
332     abi_ulong addr;
333     int wrapped, repeat;
334 
335     align = MAX(align, qemu_host_page_size);
336 
337     /* If 'start' == 0, then a default start address is used. */
338     if (start == 0) {
339         start = mmap_next_start;
340     } else {
341         start &= qemu_host_page_mask;
342     }
343     start = ROUND_UP(start, align);
344 
345     size = HOST_PAGE_ALIGN(size);
346 
347     if (reserved_va) {
348         return mmap_find_vma_reserved(start, size, align);
349     }
350 
351     addr = start;
352     wrapped = repeat = 0;
353     prev = 0;
354 
355     for (;; prev = ptr) {
356         /*
357          * Reserve needed memory area to avoid a race.
358          * It should be discarded using:
359          *  - mmap() with MAP_FIXED flag
360          *  - mremap() with MREMAP_FIXED flag
361          *  - shmat() with SHM_REMAP flag
362          */
363         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
364                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
365 
366         /* ENOMEM, if host address space has no memory */
367         if (ptr == MAP_FAILED) {
368             return (abi_ulong)-1;
369         }
370 
371         /*
372          * Count the number of sequential returns of the same address.
373          * This is used to modify the search algorithm below.
374          */
375         repeat = (ptr == prev ? repeat + 1 : 0);
376 
377         if (h2g_valid(ptr + size - 1)) {
378             addr = h2g(ptr);
379 
380             if ((addr & (align - 1)) == 0) {
381                 /* Success.  */
382                 if (start == mmap_next_start && addr >= task_unmapped_base) {
383                     mmap_next_start = addr + size;
384                 }
385                 return addr;
386             }
387 
388             /* The address is not properly aligned for the target.  */
389             switch (repeat) {
390             case 0:
391                 /*
392                  * Assume the result that the kernel gave us is the
393                  * first with enough free space, so start again at the
394                  * next higher target page.
395                  */
396                 addr = ROUND_UP(addr, align);
397                 break;
398             case 1:
399                 /*
400                  * Sometimes the kernel decides to perform the allocation
401                  * at the top end of memory instead.
402                  */
403                 addr &= -align;
404                 break;
405             case 2:
406                 /* Start over at low memory.  */
407                 addr = 0;
408                 break;
409             default:
410                 /* Fail.  This unaligned block must the last.  */
411                 addr = -1;
412                 break;
413             }
414         } else {
415             /*
416              * Since the result the kernel gave didn't fit, start
417              * again at low memory.  If any repetition, fail.
418              */
419             addr = (repeat ? -1 : 0);
420         }
421 
422         /* Unmap and try again.  */
423         munmap(ptr, size);
424 
425         /* ENOMEM if we checked the whole of the target address space.  */
426         if (addr == (abi_ulong)-1) {
427             return (abi_ulong)-1;
428         } else if (addr == 0) {
429             if (wrapped) {
430                 return (abi_ulong)-1;
431             }
432             wrapped = 1;
433             /*
434              * Don't actually use 0 when wrapping, instead indicate
435              * that we'd truly like an allocation in low memory.
436              */
437             addr = (mmap_min_addr > TARGET_PAGE_SIZE
438                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
439                      : TARGET_PAGE_SIZE);
440         } else if (wrapped && addr >= start) {
441             return (abi_ulong)-1;
442         }
443     }
444 }
445 
446 /* NOTE: all the constants are the HOST ones */
447 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
448                      int flags, int fd, off_t offset)
449 {
450     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
451     abi_ulong passthrough_start = -1, passthrough_last = 0;
452     int page_flags;
453     off_t host_offset;
454 
455     mmap_lock();
456     trace_target_mmap(start, len, target_prot, flags, fd, offset);
457 
458     if (!len) {
459         errno = EINVAL;
460         goto fail;
461     }
462 
463     page_flags = validate_prot_to_pageflags(target_prot);
464     if (!page_flags) {
465         errno = EINVAL;
466         goto fail;
467     }
468 
469     /* Also check for overflows... */
470     len = TARGET_PAGE_ALIGN(len);
471     if (!len) {
472         errno = ENOMEM;
473         goto fail;
474     }
475 
476     if (offset & ~TARGET_PAGE_MASK) {
477         errno = EINVAL;
478         goto fail;
479     }
480 
481     /*
482      * If we're mapping shared memory, ensure we generate code for parallel
483      * execution and flush old translations.  This will work up to the level
484      * supported by the host -- anything that requires EXCP_ATOMIC will not
485      * be atomic with respect to an external process.
486      */
487     if (flags & MAP_SHARED) {
488         CPUState *cpu = thread_cpu;
489         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
490             cpu->tcg_cflags |= CF_PARALLEL;
491             tb_flush(cpu);
492         }
493     }
494 
495     real_start = start & qemu_host_page_mask;
496     host_offset = offset & qemu_host_page_mask;
497 
498     /*
499      * If the user is asking for the kernel to find a location, do that
500      * before we truncate the length for mapping files below.
501      */
502     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
503         host_len = len + offset - host_offset;
504         host_len = HOST_PAGE_ALIGN(host_len);
505         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
506         if (start == (abi_ulong)-1) {
507             errno = ENOMEM;
508             goto fail;
509         }
510     }
511 
512     /*
513      * When mapping files into a memory area larger than the file, accesses
514      * to pages beyond the file size will cause a SIGBUS.
515      *
516      * For example, if mmaping a file of 100 bytes on a host with 4K pages
517      * emulating a target with 8K pages, the target expects to be able to
518      * access the first 8K. But the host will trap us on any access beyond
519      * 4K.
520      *
521      * When emulating a target with a larger page-size than the hosts, we
522      * may need to truncate file maps at EOF and add extra anonymous pages
523      * up to the targets page boundary.
524      */
525     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
526         !(flags & MAP_ANONYMOUS)) {
527         struct stat sb;
528 
529         if (fstat(fd, &sb) == -1) {
530             goto fail;
531         }
532 
533         /* Are we trying to create a map beyond EOF?.  */
534         if (offset + len > sb.st_size) {
535             /*
536              * If so, truncate the file map at eof aligned with
537              * the hosts real pagesize. Additional anonymous maps
538              * will be created beyond EOF.
539              */
540             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
541         }
542     }
543 
544     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
545         uintptr_t host_start;
546         int host_prot;
547         void *p;
548 
549         host_len = len + offset - host_offset;
550         host_len = HOST_PAGE_ALIGN(host_len);
551         host_prot = target_to_host_prot(target_prot);
552 
553         /*
554          * Note: we prefer to control the mapping address. It is
555          * especially important if qemu_host_page_size >
556          * qemu_real_host_page_size.
557          */
558         p = mmap(g2h_untagged(start), host_len, host_prot,
559                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
560         if (p == MAP_FAILED) {
561             goto fail;
562         }
563         /* update start so that it points to the file position at 'offset' */
564         host_start = (uintptr_t)p;
565         if (!(flags & MAP_ANONYMOUS)) {
566             p = mmap(g2h_untagged(start), len, host_prot,
567                      flags | MAP_FIXED, fd, host_offset);
568             if (p == MAP_FAILED) {
569                 munmap(g2h_untagged(start), host_len);
570                 goto fail;
571             }
572             host_start += offset - host_offset;
573         }
574         start = h2g(host_start);
575         last = start + len - 1;
576         passthrough_start = start;
577         passthrough_last = last;
578     } else {
579         if (start & ~TARGET_PAGE_MASK) {
580             errno = EINVAL;
581             goto fail;
582         }
583         last = start + len - 1;
584         real_last = HOST_PAGE_ALIGN(last) - 1;
585 
586         /*
587          * Test if requested memory area fits target address space
588          * It can fail only on 64-bit host with 32-bit target.
589          * On any other target/host host mmap() handles this error correctly.
590          */
591         if (last < start || !guest_range_valid_untagged(start, len)) {
592             errno = ENOMEM;
593             goto fail;
594         }
595 
596         if (flags & MAP_FIXED_NOREPLACE) {
597             /* Validate that the chosen range is empty. */
598             if (!page_check_range_empty(start, last)) {
599                 errno = EEXIST;
600                 goto fail;
601             }
602 
603             /*
604              * With reserved_va, the entire address space is mmaped in the
605              * host to ensure it isn't accidentally used for something else.
606              * We have just checked that the guest address is not mapped
607              * within the guest, but need to replace the host reservation.
608              *
609              * Without reserved_va, despite the guest address check above,
610              * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
611              * any host address mappings.
612              */
613             if (reserved_va) {
614                 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
615             }
616         }
617 
618         /*
619          * worst case: we cannot map the file because the offset is not
620          * aligned, so we read it
621          */
622         if (!(flags & MAP_ANONYMOUS) &&
623             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
624             /*
625              * msync() won't work here, so we return an error if write is
626              * possible while it is a shared mapping
627              */
628             if ((flags & MAP_TYPE) == MAP_SHARED
629                 && (target_prot & PROT_WRITE)) {
630                 errno = EINVAL;
631                 goto fail;
632             }
633             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
634                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
635                                   | MAP_PRIVATE | MAP_ANONYMOUS,
636                                   -1, 0);
637             if (retaddr == -1) {
638                 goto fail;
639             }
640             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
641                 goto fail;
642             }
643             if (!(target_prot & PROT_WRITE)) {
644                 ret = target_mprotect(start, len, target_prot);
645                 assert(ret == 0);
646             }
647             goto the_end;
648         }
649 
650         /* handle the start of the mapping */
651         if (start > real_start) {
652             if (real_last == real_start + qemu_host_page_size - 1) {
653                 /* one single host page */
654                 if (!mmap_frag(real_start, start, last,
655                                target_prot, flags, fd, offset)) {
656                     goto fail;
657                 }
658                 goto the_end1;
659             }
660             if (!mmap_frag(real_start, start,
661                            real_start + qemu_host_page_size - 1,
662                            target_prot, flags, fd, offset)) {
663                 goto fail;
664             }
665             real_start += qemu_host_page_size;
666         }
667         /* handle the end of the mapping */
668         if (last < real_last) {
669             abi_ulong real_page = real_last - qemu_host_page_size + 1;
670             if (!mmap_frag(real_page, real_page, last,
671                            target_prot, flags, fd,
672                            offset + real_page - start)) {
673                 goto fail;
674             }
675             real_last -= qemu_host_page_size;
676         }
677 
678         /* map the middle (easier) */
679         if (real_start < real_last) {
680             void *p, *want_p;
681             off_t offset1;
682             size_t len1;
683 
684             if (flags & MAP_ANONYMOUS) {
685                 offset1 = 0;
686             } else {
687                 offset1 = offset + real_start - start;
688             }
689             len1 = real_last - real_start + 1;
690             want_p = g2h_untagged(real_start);
691 
692             p = mmap(want_p, len1, target_to_host_prot(target_prot),
693                      flags, fd, offset1);
694             if (p != want_p) {
695                 if (p != MAP_FAILED) {
696                     munmap(p, len1);
697                     errno = EEXIST;
698                 }
699                 goto fail;
700             }
701             passthrough_start = real_start;
702             passthrough_last = real_last;
703         }
704     }
705  the_end1:
706     if (flags & MAP_ANONYMOUS) {
707         page_flags |= PAGE_ANON;
708     }
709     page_flags |= PAGE_RESET;
710     if (passthrough_start > passthrough_last) {
711         page_set_flags(start, last, page_flags);
712     } else {
713         if (start < passthrough_start) {
714             page_set_flags(start, passthrough_start - 1, page_flags);
715         }
716         page_set_flags(passthrough_start, passthrough_last,
717                        page_flags | PAGE_PASSTHROUGH);
718         if (passthrough_last < last) {
719             page_set_flags(passthrough_last + 1, last, page_flags);
720         }
721     }
722  the_end:
723     trace_target_mmap_complete(start);
724     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
725         FILE *f = qemu_log_trylock();
726         if (f) {
727             fprintf(f, "page layout changed following mmap\n");
728             page_dump(f);
729             qemu_log_unlock(f);
730         }
731     }
732     mmap_unlock();
733     return start;
734 fail:
735     mmap_unlock();
736     return -1;
737 }
738 
739 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
740 {
741     abi_ulong real_start;
742     abi_ulong real_last;
743     abi_ulong real_len;
744     abi_ulong last;
745     abi_ulong a;
746     void *host_start;
747     int prot;
748 
749     last = start + len - 1;
750     real_start = start & qemu_host_page_mask;
751     real_last = HOST_PAGE_ALIGN(last) - 1;
752 
753     /*
754      * If guest pages remain on the first or last host pages,
755      * adjust the deallocation to retain those guest pages.
756      * The single page special case is required for the last page,
757      * lest real_start overflow to zero.
758      */
759     if (real_last - real_start < qemu_host_page_size) {
760         prot = 0;
761         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
762             prot |= page_get_flags(a);
763         }
764         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
765             prot |= page_get_flags(a + 1);
766         }
767         if (prot != 0) {
768             return;
769         }
770     } else {
771         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
772             prot |= page_get_flags(a);
773         }
774         if (prot != 0) {
775             real_start += qemu_host_page_size;
776         }
777 
778         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
779             prot |= page_get_flags(a + 1);
780         }
781         if (prot != 0) {
782             real_last -= qemu_host_page_size;
783         }
784 
785         if (real_last < real_start) {
786             return;
787         }
788     }
789 
790     real_len = real_last - real_start + 1;
791     host_start = g2h_untagged(real_start);
792 
793     if (reserved_va) {
794         void *ptr = mmap(host_start, real_len, PROT_NONE,
795                          MAP_FIXED | MAP_ANONYMOUS
796                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
797         assert(ptr == host_start);
798     } else {
799         int ret = munmap(host_start, real_len);
800         assert(ret == 0);
801     }
802 }
803 
804 int target_munmap(abi_ulong start, abi_ulong len)
805 {
806     trace_target_munmap(start, len);
807 
808     if (start & ~TARGET_PAGE_MASK) {
809         return -TARGET_EINVAL;
810     }
811     len = TARGET_PAGE_ALIGN(len);
812     if (len == 0 || !guest_range_valid_untagged(start, len)) {
813         return -TARGET_EINVAL;
814     }
815 
816     mmap_lock();
817     mmap_reserve_or_unmap(start, len);
818     page_set_flags(start, start + len - 1, 0);
819     mmap_unlock();
820 
821     return 0;
822 }
823 
824 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
825                        abi_ulong new_size, unsigned long flags,
826                        abi_ulong new_addr)
827 {
828     int prot;
829     void *host_addr;
830 
831     if (!guest_range_valid_untagged(old_addr, old_size) ||
832         ((flags & MREMAP_FIXED) &&
833          !guest_range_valid_untagged(new_addr, new_size)) ||
834         ((flags & MREMAP_MAYMOVE) == 0 &&
835          !guest_range_valid_untagged(old_addr, new_size))) {
836         errno = ENOMEM;
837         return -1;
838     }
839 
840     mmap_lock();
841 
842     if (flags & MREMAP_FIXED) {
843         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
844                            flags, g2h_untagged(new_addr));
845 
846         if (reserved_va && host_addr != MAP_FAILED) {
847             /*
848              * If new and old addresses overlap then the above mremap will
849              * already have failed with EINVAL.
850              */
851             mmap_reserve_or_unmap(old_addr, old_size);
852         }
853     } else if (flags & MREMAP_MAYMOVE) {
854         abi_ulong mmap_start;
855 
856         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
857 
858         if (mmap_start == -1) {
859             errno = ENOMEM;
860             host_addr = MAP_FAILED;
861         } else {
862             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
863                                flags | MREMAP_FIXED,
864                                g2h_untagged(mmap_start));
865             if (reserved_va) {
866                 mmap_reserve_or_unmap(old_addr, old_size);
867             }
868         }
869     } else {
870         int prot = 0;
871         if (reserved_va && old_size < new_size) {
872             abi_ulong addr;
873             for (addr = old_addr + old_size;
874                  addr < old_addr + new_size;
875                  addr++) {
876                 prot |= page_get_flags(addr);
877             }
878         }
879         if (prot == 0) {
880             host_addr = mremap(g2h_untagged(old_addr),
881                                old_size, new_size, flags);
882 
883             if (host_addr != MAP_FAILED) {
884                 /* Check if address fits target address space */
885                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
886                     /* Revert mremap() changes */
887                     host_addr = mremap(g2h_untagged(old_addr),
888                                        new_size, old_size, flags);
889                     errno = ENOMEM;
890                     host_addr = MAP_FAILED;
891                 } else if (reserved_va && old_size > new_size) {
892                     mmap_reserve_or_unmap(old_addr + old_size,
893                                           old_size - new_size);
894                 }
895             }
896         } else {
897             errno = ENOMEM;
898             host_addr = MAP_FAILED;
899         }
900     }
901 
902     if (host_addr == MAP_FAILED) {
903         new_addr = -1;
904     } else {
905         new_addr = h2g(host_addr);
906         prot = page_get_flags(old_addr);
907         page_set_flags(old_addr, old_addr + old_size - 1, 0);
908         page_set_flags(new_addr, new_addr + new_size - 1,
909                        prot | PAGE_VALID | PAGE_RESET);
910     }
911     mmap_unlock();
912     return new_addr;
913 }
914 
915 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
916 {
917     abi_ulong len;
918     int ret = 0;
919 
920     if (start & ~TARGET_PAGE_MASK) {
921         return -TARGET_EINVAL;
922     }
923     if (len_in == 0) {
924         return 0;
925     }
926     len = TARGET_PAGE_ALIGN(len_in);
927     if (len == 0 || !guest_range_valid_untagged(start, len)) {
928         return -TARGET_EINVAL;
929     }
930 
931     /* Translate for some architectures which have different MADV_xxx values */
932     switch (advice) {
933     case TARGET_MADV_DONTNEED:      /* alpha */
934         advice = MADV_DONTNEED;
935         break;
936     case TARGET_MADV_WIPEONFORK:    /* parisc */
937         advice = MADV_WIPEONFORK;
938         break;
939     case TARGET_MADV_KEEPONFORK:    /* parisc */
940         advice = MADV_KEEPONFORK;
941         break;
942     /* we do not care about the other MADV_xxx values yet */
943     }
944 
945     /*
946      * Most advice values are hints, so ignoring and returning success is ok.
947      *
948      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
949      * MADV_KEEPONFORK are not hints and need to be emulated.
950      *
951      * A straight passthrough for those may not be safe because qemu sometimes
952      * turns private file-backed mappings into anonymous mappings.
953      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
954      * same semantics for the host as for the guest.
955      *
956      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
957      * return failure if not.
958      *
959      * MADV_DONTNEED is passed through as well, if possible.
960      * If passthrough isn't possible, we nevertheless (wrongly!) return
961      * success, which is broken but some userspace programs fail to work
962      * otherwise. Completely implementing such emulation is quite complicated
963      * though.
964      */
965     mmap_lock();
966     switch (advice) {
967     case MADV_WIPEONFORK:
968     case MADV_KEEPONFORK:
969         ret = -EINVAL;
970         /* fall through */
971     case MADV_DONTNEED:
972         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
973             ret = get_errno(madvise(g2h_untagged(start), len, advice));
974             if ((advice == MADV_DONTNEED) && (ret == 0)) {
975                 page_reset_target_data(start, start + len - 1);
976             }
977         }
978     }
979     mmap_unlock();
980 
981     return ret;
982 }
983