xref: /qemu/linux-user/mmap.c (revision f2b901098e14ad1aaffab82464917b8679499cc5)
1  /*
2   *  mmap support for qemu
3   *
4   *  Copyright (c) 2003 Fabrice Bellard
5   *
6   *  This program is free software; you can redistribute it and/or modify
7   *  it under the terms of the GNU General Public License as published by
8   *  the Free Software Foundation; either version 2 of the License, or
9   *  (at your option) any later version.
10   *
11   *  This program is distributed in the hope that it will be useful,
12   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   *  GNU General Public License for more details.
15   *
16   *  You should have received a copy of the GNU General Public License
17   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18   */
19  #include "qemu/osdep.h"
20  #include "trace.h"
21  #include "exec/log.h"
22  #include "qemu.h"
23  #include "user-internals.h"
24  #include "user-mmap.h"
25  #include "target_mman.h"
26  
27  static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28  static __thread int mmap_lock_count;
29  
30  void mmap_lock(void)
31  {
32      if (mmap_lock_count++ == 0) {
33          pthread_mutex_lock(&mmap_mutex);
34      }
35  }
36  
37  void mmap_unlock(void)
38  {
39      if (--mmap_lock_count == 0) {
40          pthread_mutex_unlock(&mmap_mutex);
41      }
42  }
43  
44  bool have_mmap_lock(void)
45  {
46      return mmap_lock_count > 0 ? true : false;
47  }
48  
49  /* Grab lock to make sure things are in a consistent state after fork().  */
50  void mmap_fork_start(void)
51  {
52      if (mmap_lock_count)
53          abort();
54      pthread_mutex_lock(&mmap_mutex);
55  }
56  
57  void mmap_fork_end(int child)
58  {
59      if (child)
60          pthread_mutex_init(&mmap_mutex, NULL);
61      else
62          pthread_mutex_unlock(&mmap_mutex);
63  }
64  
65  /*
66   * Validate target prot bitmask.
67   * Return the prot bitmask for the host in *HOST_PROT.
68   * Return 0 if the target prot bitmask is invalid, otherwise
69   * the internal qemu page_flags (which will include PAGE_VALID).
70   */
71  static int validate_prot_to_pageflags(int *host_prot, int prot)
72  {
73      int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74      int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75  
76      /*
77       * For the host, we need not pass anything except read/write/exec.
78       * While PROT_SEM is allowed by all hosts, it is also ignored, so
79       * don't bother transforming guest bit to host bit.  Any other
80       * target-specific prot bits will not be understood by the host
81       * and will need to be encoded into page_flags for qemu emulation.
82       *
83       * Pages that are executable by the guest will never be executed
84       * by the host, but the host will need to be able to read them.
85       */
86      *host_prot = (prot & (PROT_READ | PROT_WRITE))
87                 | (prot & PROT_EXEC ? PROT_READ : 0);
88  
89  #ifdef TARGET_AARCH64
90      {
91          ARMCPU *cpu = ARM_CPU(thread_cpu);
92  
93          /*
94           * The PROT_BTI bit is only accepted if the cpu supports the feature.
95           * Since this is the unusual case, don't bother checking unless
96           * the bit has been requested.  If set and valid, record the bit
97           * within QEMU's page_flags.
98           */
99          if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100              valid |= TARGET_PROT_BTI;
101              page_flags |= PAGE_BTI;
102          }
103          /* Similarly for the PROT_MTE bit. */
104          if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105              valid |= TARGET_PROT_MTE;
106              page_flags |= PAGE_MTE;
107          }
108      }
109  #elif defined(TARGET_HPPA)
110      valid |= PROT_GROWSDOWN | PROT_GROWSUP;
111  #endif
112  
113      return prot & ~valid ? 0 : page_flags;
114  }
115  
116  /* NOTE: all the constants are the HOST ones, but addresses are target. */
117  int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
118  {
119      abi_ulong end, host_start, host_end, addr;
120      int prot1, ret, page_flags, host_prot;
121  
122      trace_target_mprotect(start, len, target_prot);
123  
124      if ((start & ~TARGET_PAGE_MASK) != 0) {
125          return -TARGET_EINVAL;
126      }
127      page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128      if (!page_flags) {
129          return -TARGET_EINVAL;
130      }
131      len = TARGET_PAGE_ALIGN(len);
132      end = start + len;
133      if (!guest_range_valid_untagged(start, len)) {
134          return -TARGET_ENOMEM;
135      }
136      if (len == 0) {
137          return 0;
138      }
139  
140      mmap_lock();
141      host_start = start & qemu_host_page_mask;
142      host_end = HOST_PAGE_ALIGN(end);
143      if (start > host_start) {
144          /* handle host page containing start */
145          prot1 = host_prot;
146          for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
147              prot1 |= page_get_flags(addr);
148          }
149          if (host_end == host_start + qemu_host_page_size) {
150              for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
151                  prot1 |= page_get_flags(addr);
152              }
153              end = host_end;
154          }
155          ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
156                         prot1 & PAGE_BITS);
157          if (ret != 0) {
158              goto error;
159          }
160          host_start += qemu_host_page_size;
161      }
162      if (end < host_end) {
163          prot1 = host_prot;
164          for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
165              prot1 |= page_get_flags(addr);
166          }
167          ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
168                         qemu_host_page_size, prot1 & PAGE_BITS);
169          if (ret != 0) {
170              goto error;
171          }
172          host_end -= qemu_host_page_size;
173      }
174  
175      /* handle the pages in the middle */
176      if (host_start < host_end) {
177          ret = mprotect(g2h_untagged(host_start),
178                         host_end - host_start, host_prot);
179          if (ret != 0) {
180              goto error;
181          }
182      }
183  
184      page_set_flags(start, start + len, page_flags);
185      ret = 0;
186  
187  error:
188      mmap_unlock();
189      return ret;
190  }
191  
192  /* map an incomplete host page */
193  static int mmap_frag(abi_ulong real_start,
194                       abi_ulong start, abi_ulong end,
195                       int prot, int flags, int fd, abi_ulong offset)
196  {
197      abi_ulong real_end, addr;
198      void *host_start;
199      int prot1, prot_new;
200  
201      real_end = real_start + qemu_host_page_size;
202      host_start = g2h_untagged(real_start);
203  
204      /* get the protection of the target pages outside the mapping */
205      prot1 = 0;
206      for(addr = real_start; addr < real_end; addr++) {
207          if (addr < start || addr >= end)
208              prot1 |= page_get_flags(addr);
209      }
210  
211      if (prot1 == 0) {
212          /* no page was there, so we allocate one */
213          void *p = mmap(host_start, qemu_host_page_size, prot,
214                         flags | MAP_ANONYMOUS, -1, 0);
215          if (p == MAP_FAILED)
216              return -1;
217          prot1 = prot;
218      }
219      prot1 &= PAGE_BITS;
220  
221      prot_new = prot | prot1;
222      if (!(flags & MAP_ANONYMOUS)) {
223          /* msync() won't work here, so we return an error if write is
224             possible while it is a shared mapping */
225          if ((flags & MAP_TYPE) == MAP_SHARED &&
226              (prot & PROT_WRITE))
227              return -1;
228  
229          /* adjust protection to be able to read */
230          if (!(prot1 & PROT_WRITE))
231              mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
232  
233          /* read the corresponding file data */
234          if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
235              return -1;
236  
237          /* put final protection */
238          if (prot_new != (prot1 | PROT_WRITE))
239              mprotect(host_start, qemu_host_page_size, prot_new);
240      } else {
241          if (prot_new != prot1) {
242              mprotect(host_start, qemu_host_page_size, prot_new);
243          }
244          if (prot_new & PROT_WRITE) {
245              memset(g2h_untagged(start), 0, end - start);
246          }
247      }
248      return 0;
249  }
250  
251  #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
252  #ifdef TARGET_AARCH64
253  # define TASK_UNMAPPED_BASE  0x5500000000
254  #else
255  # define TASK_UNMAPPED_BASE  (1ul << 38)
256  #endif
257  #else
258  #ifdef TARGET_HPPA
259  # define TASK_UNMAPPED_BASE  0xfa000000
260  #else
261  # define TASK_UNMAPPED_BASE  0x40000000
262  #endif
263  #endif
264  abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
265  
266  unsigned long last_brk;
267  
268  /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
269     of guest address space.  */
270  static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
271                                          abi_ulong align)
272  {
273      abi_ulong addr, end_addr, incr = qemu_host_page_size;
274      int prot;
275      bool looped = false;
276  
277      if (size > reserved_va) {
278          return (abi_ulong)-1;
279      }
280  
281      /* Note that start and size have already been aligned by mmap_find_vma. */
282  
283      end_addr = start + size;
284      if (start > reserved_va - size) {
285          /* Start at the top of the address space.  */
286          end_addr = ((reserved_va - size) & -align) + size;
287          looped = true;
288      }
289  
290      /* Search downward from END_ADDR, checking to see if a page is in use.  */
291      addr = end_addr;
292      while (1) {
293          addr -= incr;
294          if (addr > end_addr) {
295              if (looped) {
296                  /* Failure.  The entire address space has been searched.  */
297                  return (abi_ulong)-1;
298              }
299              /* Re-start at the top of the address space.  */
300              addr = end_addr = ((reserved_va - size) & -align) + size;
301              looped = true;
302          } else {
303              prot = page_get_flags(addr);
304              if (prot) {
305                  /* Page in use.  Restart below this page.  */
306                  addr = end_addr = ((addr - size) & -align) + size;
307              } else if (addr && addr + size == end_addr) {
308                  /* Success!  All pages between ADDR and END_ADDR are free.  */
309                  if (start == mmap_next_start) {
310                      mmap_next_start = addr;
311                  }
312                  return addr;
313              }
314          }
315      }
316  }
317  
318  /*
319   * Find and reserve a free memory area of size 'size'. The search
320   * starts at 'start'.
321   * It must be called with mmap_lock() held.
322   * Return -1 if error.
323   */
324  abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
325  {
326      void *ptr, *prev;
327      abi_ulong addr;
328      int wrapped, repeat;
329  
330      align = MAX(align, qemu_host_page_size);
331  
332      /* If 'start' == 0, then a default start address is used. */
333      if (start == 0) {
334          start = mmap_next_start;
335      } else {
336          start &= qemu_host_page_mask;
337      }
338      start = ROUND_UP(start, align);
339  
340      size = HOST_PAGE_ALIGN(size);
341  
342      if (reserved_va) {
343          return mmap_find_vma_reserved(start, size, align);
344      }
345  
346      addr = start;
347      wrapped = repeat = 0;
348      prev = 0;
349  
350      for (;; prev = ptr) {
351          /*
352           * Reserve needed memory area to avoid a race.
353           * It should be discarded using:
354           *  - mmap() with MAP_FIXED flag
355           *  - mremap() with MREMAP_FIXED flag
356           *  - shmat() with SHM_REMAP flag
357           */
358          ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
359                     MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
360  
361          /* ENOMEM, if host address space has no memory */
362          if (ptr == MAP_FAILED) {
363              return (abi_ulong)-1;
364          }
365  
366          /* Count the number of sequential returns of the same address.
367             This is used to modify the search algorithm below.  */
368          repeat = (ptr == prev ? repeat + 1 : 0);
369  
370          if (h2g_valid(ptr + size - 1)) {
371              addr = h2g(ptr);
372  
373              if ((addr & (align - 1)) == 0) {
374                  /* Success.  */
375                  if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
376                      mmap_next_start = addr + size;
377                  }
378                  return addr;
379              }
380  
381              /* The address is not properly aligned for the target.  */
382              switch (repeat) {
383              case 0:
384                  /* Assume the result that the kernel gave us is the
385                     first with enough free space, so start again at the
386                     next higher target page.  */
387                  addr = ROUND_UP(addr, align);
388                  break;
389              case 1:
390                  /* Sometimes the kernel decides to perform the allocation
391                     at the top end of memory instead.  */
392                  addr &= -align;
393                  break;
394              case 2:
395                  /* Start over at low memory.  */
396                  addr = 0;
397                  break;
398              default:
399                  /* Fail.  This unaligned block must the last.  */
400                  addr = -1;
401                  break;
402              }
403          } else {
404              /* Since the result the kernel gave didn't fit, start
405                 again at low memory.  If any repetition, fail.  */
406              addr = (repeat ? -1 : 0);
407          }
408  
409          /* Unmap and try again.  */
410          munmap(ptr, size);
411  
412          /* ENOMEM if we checked the whole of the target address space.  */
413          if (addr == (abi_ulong)-1) {
414              return (abi_ulong)-1;
415          } else if (addr == 0) {
416              if (wrapped) {
417                  return (abi_ulong)-1;
418              }
419              wrapped = 1;
420              /* Don't actually use 0 when wrapping, instead indicate
421                 that we'd truly like an allocation in low memory.  */
422              addr = (mmap_min_addr > TARGET_PAGE_SIZE
423                       ? TARGET_PAGE_ALIGN(mmap_min_addr)
424                       : TARGET_PAGE_SIZE);
425          } else if (wrapped && addr >= start) {
426              return (abi_ulong)-1;
427          }
428      }
429  }
430  
431  /* NOTE: all the constants are the HOST ones */
432  abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
433                       int flags, int fd, abi_ulong offset)
434  {
435      abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
436                passthrough_start = -1, passthrough_end = -1;
437      int page_flags, host_prot;
438  
439      mmap_lock();
440      trace_target_mmap(start, len, target_prot, flags, fd, offset);
441  
442      if (!len) {
443          errno = EINVAL;
444          goto fail;
445      }
446  
447      page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
448      if (!page_flags) {
449          errno = EINVAL;
450          goto fail;
451      }
452  
453      /* Also check for overflows... */
454      len = TARGET_PAGE_ALIGN(len);
455      if (!len) {
456          errno = ENOMEM;
457          goto fail;
458      }
459  
460      if (offset & ~TARGET_PAGE_MASK) {
461          errno = EINVAL;
462          goto fail;
463      }
464  
465      /*
466       * If we're mapping shared memory, ensure we generate code for parallel
467       * execution and flush old translations.  This will work up to the level
468       * supported by the host -- anything that requires EXCP_ATOMIC will not
469       * be atomic with respect to an external process.
470       */
471      if (flags & MAP_SHARED) {
472          CPUState *cpu = thread_cpu;
473          if (!(cpu->tcg_cflags & CF_PARALLEL)) {
474              cpu->tcg_cflags |= CF_PARALLEL;
475              tb_flush(cpu);
476          }
477      }
478  
479      real_start = start & qemu_host_page_mask;
480      host_offset = offset & qemu_host_page_mask;
481  
482      /* If the user is asking for the kernel to find a location, do that
483         before we truncate the length for mapping files below.  */
484      if (!(flags & MAP_FIXED)) {
485          host_len = len + offset - host_offset;
486          host_len = HOST_PAGE_ALIGN(host_len);
487          start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
488          if (start == (abi_ulong)-1) {
489              errno = ENOMEM;
490              goto fail;
491          }
492      }
493  
494      /* When mapping files into a memory area larger than the file, accesses
495         to pages beyond the file size will cause a SIGBUS.
496  
497         For example, if mmaping a file of 100 bytes on a host with 4K pages
498         emulating a target with 8K pages, the target expects to be able to
499         access the first 8K. But the host will trap us on any access beyond
500         4K.
501  
502         When emulating a target with a larger page-size than the hosts, we
503         may need to truncate file maps at EOF and add extra anonymous pages
504         up to the targets page boundary.  */
505  
506      if ((qemu_real_host_page_size() < qemu_host_page_size) &&
507          !(flags & MAP_ANONYMOUS)) {
508          struct stat sb;
509  
510         if (fstat (fd, &sb) == -1)
511             goto fail;
512  
513         /* Are we trying to create a map beyond EOF?.  */
514         if (offset + len > sb.st_size) {
515             /* If so, truncate the file map at eof aligned with
516                the hosts real pagesize. Additional anonymous maps
517                will be created beyond EOF.  */
518             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
519         }
520      }
521  
522      if (!(flags & MAP_FIXED)) {
523          unsigned long host_start;
524          void *p;
525  
526          host_len = len + offset - host_offset;
527          host_len = HOST_PAGE_ALIGN(host_len);
528  
529          /* Note: we prefer to control the mapping address. It is
530             especially important if qemu_host_page_size >
531             qemu_real_host_page_size */
532          p = mmap(g2h_untagged(start), host_len, host_prot,
533                   flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
534          if (p == MAP_FAILED) {
535              goto fail;
536          }
537          /* update start so that it points to the file position at 'offset' */
538          host_start = (unsigned long)p;
539          if (!(flags & MAP_ANONYMOUS)) {
540              p = mmap(g2h_untagged(start), len, host_prot,
541                       flags | MAP_FIXED, fd, host_offset);
542              if (p == MAP_FAILED) {
543                  munmap(g2h_untagged(start), host_len);
544                  goto fail;
545              }
546              host_start += offset - host_offset;
547          }
548          start = h2g(host_start);
549          passthrough_start = start;
550          passthrough_end = start + len;
551      } else {
552          if (start & ~TARGET_PAGE_MASK) {
553              errno = EINVAL;
554              goto fail;
555          }
556          end = start + len;
557          real_end = HOST_PAGE_ALIGN(end);
558  
559          /*
560           * Test if requested memory area fits target address space
561           * It can fail only on 64-bit host with 32-bit target.
562           * On any other target/host host mmap() handles this error correctly.
563           */
564          if (end < start || !guest_range_valid_untagged(start, len)) {
565              errno = ENOMEM;
566              goto fail;
567          }
568  
569          /* worst case: we cannot map the file because the offset is not
570             aligned, so we read it */
571          if (!(flags & MAP_ANONYMOUS) &&
572              (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
573              /* msync() won't work here, so we return an error if write is
574                 possible while it is a shared mapping */
575              if ((flags & MAP_TYPE) == MAP_SHARED &&
576                  (host_prot & PROT_WRITE)) {
577                  errno = EINVAL;
578                  goto fail;
579              }
580              retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
581                                    MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
582                                    -1, 0);
583              if (retaddr == -1)
584                  goto fail;
585              if (pread(fd, g2h_untagged(start), len, offset) == -1)
586                  goto fail;
587              if (!(host_prot & PROT_WRITE)) {
588                  ret = target_mprotect(start, len, target_prot);
589                  assert(ret == 0);
590              }
591              goto the_end;
592          }
593  
594          /* handle the start of the mapping */
595          if (start > real_start) {
596              if (real_end == real_start + qemu_host_page_size) {
597                  /* one single host page */
598                  ret = mmap_frag(real_start, start, end,
599                                  host_prot, flags, fd, offset);
600                  if (ret == -1)
601                      goto fail;
602                  goto the_end1;
603              }
604              ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
605                              host_prot, flags, fd, offset);
606              if (ret == -1)
607                  goto fail;
608              real_start += qemu_host_page_size;
609          }
610          /* handle the end of the mapping */
611          if (end < real_end) {
612              ret = mmap_frag(real_end - qemu_host_page_size,
613                              real_end - qemu_host_page_size, end,
614                              host_prot, flags, fd,
615                              offset + real_end - qemu_host_page_size - start);
616              if (ret == -1)
617                  goto fail;
618              real_end -= qemu_host_page_size;
619          }
620  
621          /* map the middle (easier) */
622          if (real_start < real_end) {
623              void *p;
624              unsigned long offset1;
625              if (flags & MAP_ANONYMOUS)
626                  offset1 = 0;
627              else
628                  offset1 = offset + real_start - start;
629              p = mmap(g2h_untagged(real_start), real_end - real_start,
630                       host_prot, flags, fd, offset1);
631              if (p == MAP_FAILED)
632                  goto fail;
633              passthrough_start = real_start;
634              passthrough_end = real_end;
635          }
636      }
637   the_end1:
638      if (flags & MAP_ANONYMOUS) {
639          page_flags |= PAGE_ANON;
640      }
641      page_flags |= PAGE_RESET;
642      if (passthrough_start == passthrough_end) {
643          page_set_flags(start, start + len, page_flags);
644      } else {
645          if (start < passthrough_start) {
646              page_set_flags(start, passthrough_start, page_flags);
647          }
648          page_set_flags(passthrough_start, passthrough_end,
649                         page_flags | PAGE_PASSTHROUGH);
650          if (passthrough_end < start + len) {
651              page_set_flags(passthrough_end, start + len, page_flags);
652          }
653      }
654   the_end:
655      trace_target_mmap_complete(start);
656      if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
657          FILE *f = qemu_log_trylock();
658          if (f) {
659              fprintf(f, "page layout changed following mmap\n");
660              page_dump(f);
661              qemu_log_unlock(f);
662          }
663      }
664      mmap_unlock();
665      return start;
666  fail:
667      mmap_unlock();
668      return -1;
669  }
670  
671  static void mmap_reserve(abi_ulong start, abi_ulong size)
672  {
673      abi_ulong real_start;
674      abi_ulong real_end;
675      abi_ulong addr;
676      abi_ulong end;
677      int prot;
678  
679      real_start = start & qemu_host_page_mask;
680      real_end = HOST_PAGE_ALIGN(start + size);
681      end = start + size;
682      if (start > real_start) {
683          /* handle host page containing start */
684          prot = 0;
685          for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
686              prot |= page_get_flags(addr);
687          }
688          if (real_end == real_start + qemu_host_page_size) {
689              for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
690                  prot |= page_get_flags(addr);
691              }
692              end = real_end;
693          }
694          if (prot != 0)
695              real_start += qemu_host_page_size;
696      }
697      if (end < real_end) {
698          prot = 0;
699          for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
700              prot |= page_get_flags(addr);
701          }
702          if (prot != 0)
703              real_end -= qemu_host_page_size;
704      }
705      if (real_start != real_end) {
706          mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
707                   MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
708                   -1, 0);
709      }
710  }
711  
712  int target_munmap(abi_ulong start, abi_ulong len)
713  {
714      abi_ulong end, real_start, real_end, addr;
715      int prot, ret;
716  
717      trace_target_munmap(start, len);
718  
719      if (start & ~TARGET_PAGE_MASK)
720          return -TARGET_EINVAL;
721      len = TARGET_PAGE_ALIGN(len);
722      if (len == 0 || !guest_range_valid_untagged(start, len)) {
723          return -TARGET_EINVAL;
724      }
725  
726      mmap_lock();
727      end = start + len;
728      real_start = start & qemu_host_page_mask;
729      real_end = HOST_PAGE_ALIGN(end);
730  
731      if (start > real_start) {
732          /* handle host page containing start */
733          prot = 0;
734          for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
735              prot |= page_get_flags(addr);
736          }
737          if (real_end == real_start + qemu_host_page_size) {
738              for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
739                  prot |= page_get_flags(addr);
740              }
741              end = real_end;
742          }
743          if (prot != 0)
744              real_start += qemu_host_page_size;
745      }
746      if (end < real_end) {
747          prot = 0;
748          for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
749              prot |= page_get_flags(addr);
750          }
751          if (prot != 0)
752              real_end -= qemu_host_page_size;
753      }
754  
755      ret = 0;
756      /* unmap what we can */
757      if (real_start < real_end) {
758          if (reserved_va) {
759              mmap_reserve(real_start, real_end - real_start);
760          } else {
761              ret = munmap(g2h_untagged(real_start), real_end - real_start);
762          }
763      }
764  
765      if (ret == 0) {
766          page_set_flags(start, start + len, 0);
767      }
768      mmap_unlock();
769      return ret;
770  }
771  
772  abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
773                         abi_ulong new_size, unsigned long flags,
774                         abi_ulong new_addr)
775  {
776      int prot;
777      void *host_addr;
778  
779      if (!guest_range_valid_untagged(old_addr, old_size) ||
780          ((flags & MREMAP_FIXED) &&
781           !guest_range_valid_untagged(new_addr, new_size)) ||
782          ((flags & MREMAP_MAYMOVE) == 0 &&
783           !guest_range_valid_untagged(old_addr, new_size))) {
784          errno = ENOMEM;
785          return -1;
786      }
787  
788      mmap_lock();
789  
790      if (flags & MREMAP_FIXED) {
791          host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
792                             flags, g2h_untagged(new_addr));
793  
794          if (reserved_va && host_addr != MAP_FAILED) {
795              /* If new and old addresses overlap then the above mremap will
796                 already have failed with EINVAL.  */
797              mmap_reserve(old_addr, old_size);
798          }
799      } else if (flags & MREMAP_MAYMOVE) {
800          abi_ulong mmap_start;
801  
802          mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
803  
804          if (mmap_start == -1) {
805              errno = ENOMEM;
806              host_addr = MAP_FAILED;
807          } else {
808              host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
809                                 flags | MREMAP_FIXED,
810                                 g2h_untagged(mmap_start));
811              if (reserved_va) {
812                  mmap_reserve(old_addr, old_size);
813              }
814          }
815      } else {
816          int prot = 0;
817          if (reserved_va && old_size < new_size) {
818              abi_ulong addr;
819              for (addr = old_addr + old_size;
820                   addr < old_addr + new_size;
821                   addr++) {
822                  prot |= page_get_flags(addr);
823              }
824          }
825          if (prot == 0) {
826              host_addr = mremap(g2h_untagged(old_addr),
827                                 old_size, new_size, flags);
828  
829              if (host_addr != MAP_FAILED) {
830                  /* Check if address fits target address space */
831                  if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
832                      /* Revert mremap() changes */
833                      host_addr = mremap(g2h_untagged(old_addr),
834                                         new_size, old_size, flags);
835                      errno = ENOMEM;
836                      host_addr = MAP_FAILED;
837                  } else if (reserved_va && old_size > new_size) {
838                      mmap_reserve(old_addr + old_size, old_size - new_size);
839                  }
840              }
841          } else {
842              errno = ENOMEM;
843              host_addr = MAP_FAILED;
844          }
845      }
846  
847      if (host_addr == MAP_FAILED) {
848          new_addr = -1;
849      } else {
850          new_addr = h2g(host_addr);
851          prot = page_get_flags(old_addr);
852          page_set_flags(old_addr, old_addr + old_size, 0);
853          page_set_flags(new_addr, new_addr + new_size,
854                         prot | PAGE_VALID | PAGE_RESET);
855      }
856      mmap_unlock();
857      return new_addr;
858  }
859  
860  static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
861  {
862      ulong addr;
863  
864      if ((start | end) & ~qemu_host_page_mask) {
865          return false;
866      }
867  
868      for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
869          if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
870              return false;
871          }
872      }
873  
874      return true;
875  }
876  
877  abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
878  {
879      abi_ulong len, end;
880      int ret = 0;
881  
882      if (start & ~TARGET_PAGE_MASK) {
883          return -TARGET_EINVAL;
884      }
885      len = TARGET_PAGE_ALIGN(len_in);
886  
887      if (len_in && !len) {
888          return -TARGET_EINVAL;
889      }
890  
891      end = start + len;
892      if (end < start) {
893          return -TARGET_EINVAL;
894      }
895  
896      if (end == start) {
897          return 0;
898      }
899  
900      if (!guest_range_valid_untagged(start, len)) {
901          return -TARGET_EINVAL;
902      }
903  
904      /* Translate for some architectures which have different MADV_xxx values */
905      switch (advice) {
906      case TARGET_MADV_DONTNEED:      /* alpha */
907          advice = MADV_DONTNEED;
908          break;
909      case TARGET_MADV_WIPEONFORK:    /* parisc */
910          advice = MADV_WIPEONFORK;
911          break;
912      case TARGET_MADV_KEEPONFORK:    /* parisc */
913          advice = MADV_KEEPONFORK;
914          break;
915      /* we do not care about the other MADV_xxx values yet */
916      }
917  
918      /*
919       * Most advice values are hints, so ignoring and returning success is ok.
920       *
921       * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
922       * MADV_KEEPONFORK are not hints and need to be emulated.
923       *
924       * A straight passthrough for those may not be safe because qemu sometimes
925       * turns private file-backed mappings into anonymous mappings.
926       * can_passthrough_madvise() helps to check if a passthrough is possible by
927       * comparing mappings that are known to have the same semantics in the host
928       * and the guest. In this case passthrough is safe.
929       *
930       * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
931       * return failure if not.
932       *
933       * MADV_DONTNEED is passed through as well, if possible.
934       * If passthrough isn't possible, we nevertheless (wrongly!) return
935       * success, which is broken but some userspace programs fail to work
936       * otherwise. Completely implementing such emulation is quite complicated
937       * though.
938       */
939      mmap_lock();
940      switch (advice) {
941      case MADV_WIPEONFORK:
942      case MADV_KEEPONFORK:
943          ret = -EINVAL;
944          /* fall through */
945      case MADV_DONTNEED:
946          if (can_passthrough_madvise(start, end)) {
947              ret = get_errno(madvise(g2h_untagged(start), len, advice));
948              if ((advice == MADV_DONTNEED) && (ret == 0)) {
949                  page_reset_target_data(start, start + len);
950              }
951          }
952      }
953      mmap_unlock();
954  
955      return ret;
956  }
957