1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Juniper Networks, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/mman.h>
31 #include <sys/stat.h>
32 #include <assert.h>
33 #include <err.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <paths.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42
43 #include "image.h"
44 #include "mkimg.h"
45
46 #ifndef MAP_NOCORE
47 #define MAP_NOCORE 0
48 #endif
49 #ifndef MAP_NOSYNC
50 #define MAP_NOSYNC 0
51 #endif
52
53 #ifndef SEEK_DATA
54 #define SEEK_DATA -1
55 #endif
56 #ifndef SEEK_HOLE
57 #define SEEK_HOLE -1
58 #endif
59
60 struct chunk {
61 TAILQ_ENTRY(chunk) ch_list;
62 size_t ch_size; /* Size of chunk in bytes. */
63 lba_t ch_block; /* Block address in image. */
64 union {
65 struct {
66 off_t ofs; /* Offset in backing file. */
67 int fd; /* FD of backing file. */
68 } file;
69 struct {
70 void *ptr; /* Pointer to data in memory */
71 } mem;
72 } ch_u;
73 u_int ch_type;
74 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */
75 #define CH_TYPE_FILE 1 /* File-backed chunk. */
76 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */
77 };
78
79 static TAILQ_HEAD(chunk_head, chunk) image_chunks;
80 static u_int image_nchunks;
81
82 static char image_swap_file[PATH_MAX];
83 static int image_swap_fd = -1;
84 static u_int image_swap_pgsz;
85 static off_t image_swap_size;
86
87 static lba_t image_size;
88
89 static int
is_empty_sector(void * buf)90 is_empty_sector(void *buf)
91 {
92 uint64_t *p = buf;
93 size_t n, max;
94
95 assert(((uintptr_t)p & 3) == 0);
96
97 max = secsz / sizeof(uint64_t);
98 for (n = 0; n < max; n++) {
99 if (p[n] != 0UL)
100 return (0);
101 }
102 return (1);
103 }
104
105 /*
106 * Swap file handlng.
107 */
108
109 static off_t
image_swap_alloc(size_t size)110 image_swap_alloc(size_t size)
111 {
112 off_t ofs;
113 size_t unit;
114
115 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
116 assert((unit & (unit - 1)) == 0);
117
118 size = (size + unit - 1) & ~(unit - 1);
119
120 ofs = image_swap_size;
121 image_swap_size += size;
122 if (ftruncate(image_swap_fd, image_swap_size) == -1) {
123 image_swap_size = ofs;
124 ofs = -1LL;
125 }
126 return (ofs);
127 }
128
129 /*
130 * Image chunk handling.
131 */
132
133 static struct chunk *
image_chunk_find(lba_t blk)134 image_chunk_find(lba_t blk)
135 {
136 static struct chunk *last = NULL;
137 struct chunk *ch;
138
139 ch = (last != NULL && last->ch_block <= blk)
140 ? last : TAILQ_FIRST(&image_chunks);
141 while (ch != NULL) {
142 if (ch->ch_block <= blk &&
143 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) {
144 last = ch;
145 break;
146 }
147 ch = TAILQ_NEXT(ch, ch_list);
148 }
149 return (ch);
150 }
151
152 static size_t
image_chunk_grow(struct chunk * ch,size_t sz)153 image_chunk_grow(struct chunk *ch, size_t sz)
154 {
155 size_t dsz, newsz;
156
157 newsz = ch->ch_size + sz;
158 if (newsz > ch->ch_size) {
159 ch->ch_size = newsz;
160 return (0);
161 }
162 /* We would overflow -- create new chunk for remainder. */
163 dsz = SIZE_MAX - ch->ch_size;
164 assert(dsz < sz);
165 ch->ch_size = SIZE_MAX;
166 return (sz - dsz);
167 }
168
169 static struct chunk *
image_chunk_memory(struct chunk * ch,lba_t blk)170 image_chunk_memory(struct chunk *ch, lba_t blk)
171 {
172 struct chunk *new;
173 void *ptr;
174
175 ptr = calloc(1, secsz);
176 if (ptr == NULL)
177 return (NULL);
178
179 if (ch->ch_block < blk) {
180 new = malloc(sizeof(*new));
181 if (new == NULL) {
182 free(ptr);
183 return (NULL);
184 }
185 memcpy(new, ch, sizeof(*new));
186 ch->ch_size = (blk - ch->ch_block) * secsz;
187 new->ch_block = blk;
188 new->ch_size -= ch->ch_size;
189 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
190 image_nchunks++;
191 ch = new;
192 }
193
194 if (ch->ch_size > secsz) {
195 new = malloc(sizeof(*new));
196 if (new == NULL) {
197 free(ptr);
198 return (NULL);
199 }
200 memcpy(new, ch, sizeof(*new));
201 ch->ch_size = secsz;
202 new->ch_block++;
203 new->ch_size -= secsz;
204 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
205 image_nchunks++;
206 }
207
208 ch->ch_type = CH_TYPE_MEMORY;
209 ch->ch_u.mem.ptr = ptr;
210 return (ch);
211 }
212
213 static int
image_chunk_skipto(lba_t to)214 image_chunk_skipto(lba_t to)
215 {
216 struct chunk *ch;
217 lba_t from;
218 size_t sz;
219
220 ch = TAILQ_LAST(&image_chunks, chunk_head);
221 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
222
223 assert(from <= to);
224
225 /* Nothing to do? */
226 if (from == to)
227 return (0);
228 /* Avoid bugs due to overflows. */
229 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
230 return (EFBIG);
231 sz = (to - from) * secsz;
232 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) {
233 sz = image_chunk_grow(ch, sz);
234 if (sz == 0)
235 return (0);
236 from = ch->ch_block + (ch->ch_size / secsz);
237 }
238 ch = malloc(sizeof(*ch));
239 if (ch == NULL)
240 return (ENOMEM);
241 memset(ch, 0, sizeof(*ch));
242 ch->ch_block = from;
243 ch->ch_size = sz;
244 ch->ch_type = CH_TYPE_ZEROES;
245 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
246 image_nchunks++;
247 return (0);
248 }
249
250 static int
image_chunk_append(lba_t blk,size_t sz,off_t ofs,int fd)251 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
252 {
253 struct chunk *ch;
254
255 ch = TAILQ_LAST(&image_chunks, chunk_head);
256 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) {
257 if (fd == ch->ch_u.file.fd &&
258 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
259 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) {
260 sz = image_chunk_grow(ch, sz);
261 if (sz == 0)
262 return (0);
263 blk = ch->ch_block + (ch->ch_size / secsz);
264 ofs = ch->ch_u.file.ofs + ch->ch_size;
265 }
266 }
267 ch = malloc(sizeof(*ch));
268 if (ch == NULL)
269 return (ENOMEM);
270 memset(ch, 0, sizeof(*ch));
271 ch->ch_block = blk;
272 ch->ch_size = sz;
273 ch->ch_type = CH_TYPE_FILE;
274 ch->ch_u.file.ofs = ofs;
275 ch->ch_u.file.fd = fd;
276 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
277 image_nchunks++;
278 return (0);
279 }
280
281 static int
image_chunk_copyin(lba_t blk,void * buf,size_t sz,off_t ofs,int fd)282 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
283 {
284 uint8_t *p = buf;
285 int error;
286
287 error = 0;
288 sz = (sz + secsz - 1) & ~(secsz - 1);
289 while (!error && sz > 0) {
290 if (is_empty_sector(p))
291 error = image_chunk_skipto(blk + 1);
292 else
293 error = image_chunk_append(blk, secsz, ofs, fd);
294 blk++;
295 p += secsz;
296 sz -= secsz;
297 ofs += secsz;
298 }
299 return (error);
300 }
301
302 /*
303 * File mapping support.
304 */
305
306 static void *
image_file_map(int fd,off_t ofs,size_t sz,off_t * iofp)307 image_file_map(int fd, off_t ofs, size_t sz, off_t *iofp)
308 {
309 void *ptr;
310 size_t unit;
311 int flags, prot;
312 off_t x;
313
314 /* On Linux anyway ofs must also be page aligned */
315 if ((x = (ofs % image_swap_pgsz)) != 0) {
316 ofs -= x;
317 sz += x;
318 *iofp = x;
319 } else
320 *iofp = 0;
321 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
322 assert((unit & (unit - 1)) == 0);
323
324 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
325 /* Allow writing to our swap file only. */
326 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
327 sz = (sz + unit - 1) & ~(unit - 1);
328 ptr = mmap(NULL, sz, prot, flags, fd, ofs);
329 return ((ptr == MAP_FAILED) ? NULL : ptr);
330 }
331
332 static int
image_file_unmap(void * buffer,size_t sz)333 image_file_unmap(void *buffer, size_t sz)
334 {
335 size_t unit;
336
337 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
338 sz = (sz + unit - 1) & ~(unit - 1);
339 if (madvise(buffer, sz, MADV_DONTNEED) != 0)
340 warn("madvise");
341 munmap(buffer, sz);
342 return (0);
343 }
344
345 /*
346 * Input/source file handling.
347 */
348
349 static int
image_copyin_stream(lba_t blk,int fd,uint64_t * sizep)350 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
351 {
352 char *buffer;
353 uint64_t bytesize;
354 off_t swofs;
355 size_t iosz;
356 ssize_t rdsz;
357 int error;
358 off_t iof;
359
360 /*
361 * This makes sure we're doing I/O in multiples of the page
362 * size as well as of the sector size. 2MB is the minimum
363 * by virtue of secsz at least 512 bytes and the page size
364 * at least 4K bytes.
365 */
366 iosz = secsz * image_swap_pgsz;
367
368 bytesize = 0;
369 do {
370 swofs = image_swap_alloc(iosz);
371 if (swofs == -1LL)
372 return (errno);
373 buffer = image_file_map(image_swap_fd, swofs, iosz, &iof);
374 if (buffer == NULL)
375 return (errno);
376 rdsz = read(fd, &buffer[iof], iosz);
377 if (rdsz > 0)
378 error = image_chunk_copyin(blk, &buffer[iof], rdsz, swofs,
379 image_swap_fd);
380 else if (rdsz < 0)
381 error = errno;
382 else
383 error = 0;
384 image_file_unmap(buffer, iosz);
385 /* XXX should we relinguish unused swap space? */
386 if (error)
387 return (error);
388
389 bytesize += rdsz;
390 blk += (rdsz + secsz - 1) / secsz;
391 } while (rdsz > 0);
392
393 if (sizep != NULL)
394 *sizep = bytesize;
395 return (0);
396 }
397
398 static int
image_copyin_mapped(lba_t blk,int fd,uint64_t * sizep)399 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
400 {
401 off_t cur, data, end, hole, pos, iof;
402 void *mp;
403 char *buf;
404 uint64_t bytesize;
405 size_t iosz, sz;
406 int error;
407
408 /*
409 * We'd like to know the size of the file and we must
410 * be able to seek in order to mmap(2). If this isn't
411 * possible, then treat the file as a stream/pipe.
412 */
413 end = lseek(fd, 0L, SEEK_END);
414 if (end == -1L)
415 return (image_copyin_stream(blk, fd, sizep));
416
417 /*
418 * We need the file opened for the duration and our
419 * caller is going to close the file. Make a dup(2)
420 * so that control the faith of the descriptor.
421 */
422 fd = dup(fd);
423 if (fd == -1)
424 return (errno);
425
426 iosz = secsz * image_swap_pgsz;
427
428 bytesize = 0;
429 cur = pos = 0;
430 error = 0;
431 while (!error && cur < end) {
432 hole = lseek(fd, cur, SEEK_HOLE);
433 if (hole == -1)
434 hole = end;
435 data = lseek(fd, cur, SEEK_DATA);
436 if (data == -1)
437 data = end;
438
439 /*
440 * Treat the entire file as data if sparse files
441 * are not supported by the underlying file system.
442 */
443 if (hole == end && data == end)
444 data = cur;
445
446 if (cur == hole && data > hole) {
447 hole = pos;
448 pos = data & ~((uint64_t)secsz - 1);
449
450 blk += (pos - hole) / secsz;
451 error = image_chunk_skipto(blk);
452
453 bytesize += pos - hole;
454 cur = data;
455 } else if (cur == data && hole > data) {
456 data = pos;
457 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1);
458
459 while (data < pos) {
460 sz = (pos - data > (off_t)iosz)
461 ? iosz : (size_t)(pos - data);
462
463 buf = mp = image_file_map(fd, data, sz, &iof);
464 if (mp != NULL) {
465 buf += iof;
466 error = image_chunk_copyin(blk, buf,
467 sz, data, fd);
468 image_file_unmap(mp, sz);
469 } else
470 error = errno;
471
472 blk += sz / secsz;
473 bytesize += sz;
474 data += sz;
475 }
476 cur = hole;
477 } else {
478 /*
479 * I don't know what this means or whether it
480 * can happen at all...
481 */
482 assert(0);
483 }
484 }
485 if (error)
486 close(fd);
487 if (!error && sizep != NULL)
488 *sizep = bytesize;
489 return (error);
490 }
491
492 int
image_copyin(lba_t blk,int fd,uint64_t * sizep)493 image_copyin(lba_t blk, int fd, uint64_t *sizep)
494 {
495 struct stat sb;
496 int error;
497
498 error = image_chunk_skipto(blk);
499 if (!error) {
500 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
501 error = image_copyin_stream(blk, fd, sizep);
502 else
503 error = image_copyin_mapped(blk, fd, sizep);
504 }
505 return (error);
506 }
507
508 /*
509 * Output/sink file handling.
510 */
511
512 int
image_copyout(int fd)513 image_copyout(int fd)
514 {
515 int error;
516
517 error = image_copyout_region(fd, 0, image_size);
518 if (!error)
519 error = image_copyout_done(fd);
520 return (error);
521 }
522
523 int
image_copyout_done(int fd)524 image_copyout_done(int fd)
525 {
526 off_t ofs;
527 int error;
528
529 ofs = lseek(fd, 0L, SEEK_CUR);
530 if (ofs == -1)
531 return (0);
532 error = (ftruncate(fd, ofs) == -1) ? errno : 0;
533 return (error);
534 }
535
536 static int
image_copyout_memory(int fd,size_t size,void * ptr)537 image_copyout_memory(int fd, size_t size, void *ptr)
538 {
539
540 if (write(fd, ptr, size) == -1)
541 return (errno);
542 return (0);
543 }
544
545 int
image_copyout_zeroes(int fd,size_t count)546 image_copyout_zeroes(int fd, size_t count)
547 {
548 static uint8_t *zeroes = NULL;
549 size_t sz;
550 int error;
551
552 if (lseek(fd, (off_t)count, SEEK_CUR) != -1)
553 return (0);
554
555 /*
556 * If we can't seek, we must write.
557 */
558
559 if (zeroes == NULL) {
560 zeroes = calloc(1, secsz);
561 if (zeroes == NULL)
562 return (ENOMEM);
563 }
564
565 while (count > 0) {
566 sz = (count > secsz) ? secsz : count;
567 error = image_copyout_memory(fd, sz, zeroes);
568 if (error)
569 return (error);
570 count -= sz;
571 }
572 return (0);
573 }
574
575 static int
image_copyout_file(int fd,size_t size,int ifd,off_t iofs)576 image_copyout_file(int fd, size_t size, int ifd, off_t iofs)
577 {
578 void *mp;
579 char *buf;
580 size_t iosz, sz;
581 int error;
582 off_t iof;
583
584 iosz = secsz * image_swap_pgsz;
585
586 while (size > 0) {
587 sz = (size > iosz) ? iosz : size;
588 buf = mp = image_file_map(ifd, iofs, sz, &iof);
589 if (buf == NULL)
590 return (errno);
591 buf += iof;
592 error = image_copyout_memory(fd, sz, buf);
593 image_file_unmap(mp, sz);
594 if (error)
595 return (error);
596 size -= sz;
597 iofs += sz;
598 }
599 return (0);
600 }
601
602 int
image_copyout_region(int fd,lba_t blk,lba_t size)603 image_copyout_region(int fd, lba_t blk, lba_t size)
604 {
605 struct chunk *ch;
606 size_t ofs, sz;
607 int error;
608
609 size *= secsz;
610
611 error = 0;
612 while (!error && size > 0) {
613 ch = image_chunk_find(blk);
614 if (ch == NULL) {
615 error = EINVAL;
616 break;
617 }
618 ofs = (blk - ch->ch_block) * secsz;
619 sz = ch->ch_size - ofs;
620 sz = ((lba_t)sz < size) ? sz : (size_t)size;
621 switch (ch->ch_type) {
622 case CH_TYPE_ZEROES:
623 error = image_copyout_zeroes(fd, sz);
624 break;
625 case CH_TYPE_FILE:
626 error = image_copyout_file(fd, sz, ch->ch_u.file.fd,
627 ch->ch_u.file.ofs + ofs);
628 break;
629 case CH_TYPE_MEMORY:
630 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr);
631 break;
632 default:
633 assert(0);
634 }
635 size -= sz;
636 blk += sz / secsz;
637 }
638 return (error);
639 }
640
641 int
image_data(lba_t blk,lba_t size)642 image_data(lba_t blk, lba_t size)
643 {
644 struct chunk *ch;
645 lba_t lim;
646
647 while (1) {
648 ch = image_chunk_find(blk);
649 if (ch == NULL)
650 return (0);
651 if (ch->ch_type != CH_TYPE_ZEROES)
652 return (1);
653 lim = ch->ch_block + (ch->ch_size / secsz);
654 if (lim >= blk + size)
655 return (0);
656 size -= lim - blk;
657 blk = lim;
658 }
659 /*NOTREACHED*/
660 }
661
662 lba_t
image_get_size(void)663 image_get_size(void)
664 {
665
666 return (image_size);
667 }
668
669 int
image_set_size(lba_t blk)670 image_set_size(lba_t blk)
671 {
672 int error;
673
674 error = image_chunk_skipto(blk);
675 if (!error)
676 image_size = blk;
677 return (error);
678 }
679
680 int
image_write(lba_t blk,void * buf,ssize_t len)681 image_write(lba_t blk, void *buf, ssize_t len)
682 {
683 struct chunk *ch;
684
685 while (len > 0) {
686 if (!is_empty_sector(buf)) {
687 ch = image_chunk_find(blk);
688 if (ch == NULL)
689 return (ENXIO);
690 /* We may not be able to write to files. */
691 if (ch->ch_type == CH_TYPE_FILE)
692 return (EINVAL);
693 if (ch->ch_type == CH_TYPE_ZEROES) {
694 ch = image_chunk_memory(ch, blk);
695 if (ch == NULL)
696 return (ENOMEM);
697 }
698 assert(ch->ch_type == CH_TYPE_MEMORY);
699 memcpy(ch->ch_u.mem.ptr, buf, secsz);
700 }
701 blk++;
702 buf = (char *)buf + secsz;
703 len--;
704 }
705 return (0);
706 }
707
708 static void
image_cleanup(void)709 image_cleanup(void)
710 {
711 struct chunk *ch;
712
713 while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) {
714 switch (ch->ch_type) {
715 case CH_TYPE_FILE:
716 /* We may be closing the same file multiple times. */
717 if (ch->ch_u.file.fd != -1)
718 close(ch->ch_u.file.fd);
719 break;
720 case CH_TYPE_MEMORY:
721 free(ch->ch_u.mem.ptr);
722 break;
723 default:
724 break;
725 }
726 TAILQ_REMOVE(&image_chunks, ch, ch_list);
727 free(ch);
728 }
729 if (image_swap_fd != -1)
730 close(image_swap_fd);
731 unlink(image_swap_file);
732 }
733
734 int
image_init(void)735 image_init(void)
736 {
737 const char *tmpdir;
738
739 TAILQ_INIT(&image_chunks);
740 image_nchunks = 0;
741
742 image_swap_size = 0;
743 image_swap_pgsz = getpagesize();
744
745 if (atexit(image_cleanup) == -1)
746 return (errno);
747 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
748 tmpdir = _PATH_TMP;
749 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
750 tmpdir);
751 image_swap_fd = mkstemp(image_swap_file);
752 if (image_swap_fd == -1)
753 return (errno);
754 return (0);
755 }
756