1 /* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004-2026 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 #include "gzguts.h"
7 #include <unistd.h>
8
9 /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
10 state->fd, and update state->eof, state->err, and state->msg as appropriate.
11 This function needs to loop on read(), since read() is not guaranteed to
12 read the number of bytes requested, depending on the type of descriptor. It
13 also needs to loop to manage the fact that read() returns an int. If the
14 descriptor is non-blocking and read() returns with no data in order to avoid
15 blocking, then gz_load() will return 0 if some data has been read, or -1 if
16 no data has been read. Either way, state->again is set true to indicate a
17 non-blocking event. If errno is non-zero on return, then there was an error
18 signaled from read(). *have is set to the number of bytes read. */
gz_load(gz_statep state,unsigned char * buf,unsigned len,unsigned * have)19 local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
20 unsigned *have) {
21 int ret;
22 unsigned get, max = ((unsigned)-1 >> 2) + 1;
23
24 state->again = 0;
25 errno = 0;
26 *have = 0;
27 do {
28 get = len - *have;
29 if (get > max)
30 get = max;
31 ret = (int)read(state->fd, buf + *have, get);
32 if (ret <= 0)
33 break;
34 *have += (unsigned)ret;
35 } while (*have < len);
36 if (ret < 0) {
37 if (errno == EAGAIN || errno == EWOULDBLOCK) {
38 state->again = 1;
39 if (*have != 0)
40 return 0;
41 }
42 gz_error(state, Z_ERRNO, zstrerror());
43 return -1;
44 }
45 if (ret == 0)
46 state->eof = 1;
47 return 0;
48 }
49
50 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
51 error, 0 otherwise. Note that the eof flag is set when the end of the input
52 file is reached, even though there may be unused data in the buffer. Once
53 that data has been used, no more attempts will be made to read the file.
54 If strm->avail_in != 0, then the current data is moved to the beginning of
55 the input buffer, and then the remainder of the buffer is loaded with the
56 available data from the input file. */
gz_avail(gz_statep state)57 local int gz_avail(gz_statep state) {
58 unsigned got;
59 z_streamp strm = &(state->strm);
60
61 if (state->err != Z_OK && state->err != Z_BUF_ERROR)
62 return -1;
63 if (state->eof == 0) {
64 if (strm->avail_in) { /* copy what's there to the start */
65 unsigned char *p = state->in;
66 unsigned const char *q = strm->next_in;
67
68 if (q != p) {
69 unsigned n = strm->avail_in;
70
71 do {
72 *p++ = *q++;
73 } while (--n);
74 }
75 }
76 if (gz_load(state, state->in + strm->avail_in,
77 state->size - strm->avail_in, &got) == -1)
78 return -1;
79 strm->avail_in += got;
80 strm->next_in = state->in;
81 }
82 return 0;
83 }
84
85 /* Look for gzip header, set up for inflate or copy. state->x.have must be 0.
86 If this is the first time in, allocate required memory. state->how will be
87 left unchanged if there is no more input data available, will be set to COPY
88 if there is no gzip header and direct copying will be performed, or it will
89 be set to GZIP for decompression. If direct copying, then leftover input
90 data from the input buffer will be copied to the output buffer. In that
91 case, all further file reads will be directly to either the output buffer or
92 a user buffer. If decompressing, the inflate state will be initialized.
93 gz_look() will return 0 on success or -1 on failure. */
gz_look(gz_statep state)94 local int gz_look(gz_statep state) {
95 z_streamp strm = &(state->strm);
96
97 /* allocate read buffers and inflate memory */
98 if (state->size == 0) {
99 /* allocate buffers */
100 state->in = (unsigned char *)malloc(state->want);
101 state->out = (unsigned char *)malloc(state->want << 1);
102 if (state->in == NULL || state->out == NULL) {
103 free(state->out);
104 free(state->in);
105 gz_error(state, Z_MEM_ERROR, "out of memory");
106 return -1;
107 }
108 state->size = state->want;
109
110 /* allocate inflate memory */
111 state->strm.zalloc = Z_NULL;
112 state->strm.zfree = Z_NULL;
113 state->strm.opaque = Z_NULL;
114 state->strm.avail_in = 0;
115 state->strm.next_in = Z_NULL;
116 if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
117 free(state->out);
118 free(state->in);
119 state->size = 0;
120 gz_error(state, Z_MEM_ERROR, "out of memory");
121 return -1;
122 }
123 }
124
125 /* if transparent reading is disabled, which would only be at the start, or
126 if we're looking for a gzip member after the first one, which is not at
127 the start, then proceed directly to look for a gzip member next */
128 if (state->direct == -1 || state->junk == 0) {
129 inflateReset(strm);
130 state->how = GZIP;
131 state->junk = state->junk != -1;
132 state->direct = 0;
133 return 0;
134 }
135
136 /* otherwise we're at the start with auto-detect -- we check to see if the
137 first four bytes could be gzip header in order to decide whether or not
138 this will be a transparent read */
139
140 /* load any header bytes into the input buffer -- if the input is empty,
141 then it's not an error as this is a transparent read of zero bytes */
142 if (gz_avail(state) == -1)
143 return -1;
144 if (strm->avail_in == 0 || (state->again && strm->avail_in < 4))
145 /* if non-blocking input stalled before getting four bytes, then
146 return and wait until a later call has accumulated enough */
147 return 0;
148
149 /* see if this is (likely) gzip input -- if the first four bytes are
150 consistent with a gzip header, then go look for the first gzip member,
151 otherwise proceed to copy the input transparently */
152 if (strm->avail_in > 3 &&
153 strm->next_in[0] == 31 && strm->next_in[1] == 139 &&
154 strm->next_in[2] == 8 && strm->next_in[3] < 32) {
155 inflateReset(strm);
156 state->how = GZIP;
157 state->junk = 1;
158 state->direct = 0;
159 return 0;
160 }
161
162 /* doing raw i/o: copy any leftover input to output -- this assumes that
163 the output buffer is larger than the input buffer, which also assures
164 space for gzungetc() */
165 state->x.next = state->out;
166 memcpy(state->x.next, strm->next_in, strm->avail_in);
167 state->x.have = strm->avail_in;
168 strm->avail_in = 0;
169 state->how = COPY;
170 return 0;
171 }
172
173 /* Decompress from input to the provided next_out and avail_out in the state.
174 On return, state->x.have and state->x.next point to the just decompressed
175 data. If the gzip stream completes, state->how is reset to LOOK to look for
176 the next gzip stream or raw data, once state->x.have is depleted. Returns 0
177 on success, -1 on failure. If EOF is reached when looking for more input to
178 complete the gzip member, then an unexpected end of file error is raised.
179 If there is no more input, but state->again is true, then EOF has not been
180 reached, and no error is raised. */
gz_decomp(gz_statep state)181 local int gz_decomp(gz_statep state) {
182 int ret = Z_OK;
183 unsigned had;
184 z_streamp strm = &(state->strm);
185
186 /* fill output buffer up to end of deflate stream */
187 had = strm->avail_out;
188 do {
189 /* get more input for inflate() */
190 if (strm->avail_in == 0 && gz_avail(state) == -1) {
191 ret = state->err;
192 break;
193 }
194 if (strm->avail_in == 0) {
195 if (!state->again)
196 gz_error(state, Z_BUF_ERROR, "unexpected end of file");
197 break;
198 }
199
200 /* decompress and handle errors */
201 ret = inflate(strm, Z_NO_FLUSH);
202 if (strm->avail_out < had)
203 /* any decompressed data marks this as a real gzip stream */
204 state->junk = 0;
205 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
206 gz_error(state, Z_STREAM_ERROR,
207 "internal error: inflate stream corrupt");
208 break;
209 }
210 if (ret == Z_MEM_ERROR) {
211 gz_error(state, Z_MEM_ERROR, "out of memory");
212 break;
213 }
214 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
215 if (state->junk == 1) { /* trailing garbage is ok */
216 strm->avail_in = 0;
217 state->eof = 1;
218 state->how = LOOK;
219 ret = Z_OK;
220 break;
221 }
222 gz_error(state, Z_DATA_ERROR,
223 strm->msg == NULL ? "compressed data error" : strm->msg);
224 break;
225 }
226 } while (strm->avail_out && ret != Z_STREAM_END);
227
228 /* update available output */
229 state->x.have = had - strm->avail_out;
230 state->x.next = strm->next_out - state->x.have;
231
232 /* if the gzip stream completed successfully, look for another */
233 if (ret == Z_STREAM_END) {
234 state->junk = 0;
235 state->how = LOOK;
236 return 0;
237 }
238
239 /* return decompression status */
240 return ret != Z_OK ? -1 : 0;
241 }
242
243 /* Fetch data and put it in the output buffer. Assumes state->x.have is 0.
244 Data is either copied from the input file or decompressed from the input
245 file depending on state->how. If state->how is LOOK, then a gzip header is
246 looked for to determine whether to copy or decompress. Returns -1 on error,
247 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
248 end of the input file has been reached and all data has been processed. */
gz_fetch(gz_statep state)249 local int gz_fetch(gz_statep state) {
250 z_streamp strm = &(state->strm);
251
252 do {
253 switch(state->how) {
254 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
255 if (gz_look(state) == -1)
256 return -1;
257 if (state->how == LOOK)
258 return 0;
259 break;
260 case COPY: /* -> COPY */
261 if (gz_load(state, state->out, state->size << 1, &(state->x.have))
262 == -1)
263 return -1;
264 state->x.next = state->out;
265 return 0;
266 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
267 strm->avail_out = state->size << 1;
268 strm->next_out = state->out;
269 if (gz_decomp(state) == -1)
270 return -1;
271 break;
272 default:
273 gz_error(state, Z_STREAM_ERROR, "state corrupt");
274 return -1;
275 }
276 } while (state->x.have == 0 && (!state->eof || strm->avail_in));
277 return 0;
278 }
279
280 /* Skip state->skip (> 0) uncompressed bytes of output. Return -1 on error, 0
281 on success. */
gz_skip(gz_statep state)282 local int gz_skip(gz_statep state) {
283 unsigned n;
284
285 /* skip over len bytes or reach end-of-file, whichever comes first */
286 do {
287 /* skip over whatever is in output buffer */
288 if (state->x.have) {
289 n = GT_OFF(state->x.have) ||
290 (z_off64_t)state->x.have > state->skip ?
291 (unsigned)state->skip : state->x.have;
292 state->x.have -= n;
293 state->x.next += n;
294 state->x.pos += n;
295 state->skip -= n;
296 }
297
298 /* output buffer empty -- return if we're at the end of the input */
299 else if (state->eof && state->strm.avail_in == 0)
300 break;
301
302 /* need more data to skip -- load up output buffer */
303 else {
304 /* get more output, looking for header if required */
305 if (gz_fetch(state) == -1)
306 return -1;
307 }
308 } while (state->skip);
309 return 0;
310 }
311
312 /* Read len bytes into buf from file, or less than len up to the end of the
313 input. Return the number of bytes read. If zero is returned, either the end
314 of file was reached, or there was an error. state->err must be consulted in
315 that case to determine which. If there was an error, but some uncompressed
316 bytes were read before the error, then that count is returned. The error is
317 still recorded, and so is deferred until the next call. */
gz_read(gz_statep state,voidp buf,z_size_t len)318 local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
319 z_size_t got;
320 unsigned n;
321 int err;
322
323 /* if len is zero, avoid unnecessary operations */
324 if (len == 0)
325 return 0;
326
327 /* process a skip request */
328 if (state->skip && gz_skip(state) == -1)
329 return 0;
330
331 /* get len bytes to buf, or less than len if at the end */
332 got = 0;
333 err = 0;
334 do {
335 /* set n to the maximum amount of len that fits in an unsigned int */
336 n = (unsigned)-1;
337 if (n > len)
338 n = (unsigned)len;
339
340 /* first just try copying data from the output buffer */
341 if (state->x.have) {
342 if (state->x.have < n)
343 n = state->x.have;
344 memcpy(buf, state->x.next, n);
345 state->x.next += n;
346 state->x.have -= n;
347 if (state->err != Z_OK)
348 /* caught deferred error from gz_fetch() */
349 err = -1;
350 }
351
352 /* output buffer empty -- return if we're at the end of the input */
353 else if (state->eof && state->strm.avail_in == 0)
354 break;
355
356 /* need output data -- for small len or new stream load up our output
357 buffer, so that gzgetc() can be fast */
358 else if (state->how == LOOK || n < (state->size << 1)) {
359 /* get more output, looking for header if required */
360 if (gz_fetch(state) == -1 && state->x.have == 0)
361 /* if state->x.have != 0, error will be caught after copy */
362 err = -1;
363 continue; /* no progress yet -- go back to copy above */
364 /* the copy above assures that we will leave with space in the
365 output buffer, allowing at least one gzungetc() to succeed */
366 }
367
368 /* large len -- read directly into user buffer */
369 else if (state->how == COPY) /* read directly */
370 err = gz_load(state, (unsigned char *)buf, n, &n);
371
372 /* large len -- decompress directly into user buffer */
373 else { /* state->how == GZIP */
374 state->strm.avail_out = n;
375 state->strm.next_out = (unsigned char *)buf;
376 err = gz_decomp(state);
377 n = state->x.have;
378 state->x.have = 0;
379 }
380
381 /* update progress */
382 len -= n;
383 buf = (char *)buf + n;
384 got += n;
385 state->x.pos += n;
386 } while (len && !err);
387
388 /* note read past eof */
389 if (len && state->eof)
390 state->past = 1;
391
392 /* return number of bytes read into user buffer */
393 return got;
394 }
395
396 /* -- see zlib.h -- */
gzread(gzFile file,voidp buf,unsigned len)397 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
398 gz_statep state;
399
400 /* get internal structure and check that it's for reading */
401 if (file == NULL)
402 return -1;
403 state = (gz_statep)file;
404 if (state->mode != GZ_READ)
405 return -1;
406
407 /* check that there was no (serious) error */
408 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
409 return -1;
410 gz_error(state, Z_OK, NULL);
411
412 /* since an int is returned, make sure len fits in one, otherwise return
413 with an error (this avoids a flaw in the interface) */
414 if ((int)len < 0) {
415 gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
416 return -1;
417 }
418
419 /* read len or fewer bytes to buf */
420 len = (unsigned)gz_read(state, buf, len);
421
422 /* check for an error */
423 if (len == 0) {
424 if (state->err != Z_OK && state->err != Z_BUF_ERROR)
425 return -1;
426 if (state->again) {
427 /* non-blocking input stalled after some input was read, but no
428 uncompressed bytes were produced -- let the application know
429 this isn't EOF */
430 gz_error(state, Z_ERRNO, zstrerror());
431 return -1;
432 }
433 }
434
435 /* return the number of bytes read */
436 return (int)len;
437 }
438
439 /* -- see zlib.h -- */
gzfread(voidp buf,z_size_t size,z_size_t nitems,gzFile file)440 z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
441 gzFile file) {
442 z_size_t len;
443 gz_statep state;
444
445 /* get internal structure and check that it's for reading */
446 if (file == NULL)
447 return 0;
448 state = (gz_statep)file;
449 if (state->mode != GZ_READ)
450 return 0;
451
452 /* check that there was no (serious) error */
453 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
454 return 0;
455 gz_error(state, Z_OK, NULL);
456
457 /* compute bytes to read -- error on overflow */
458 len = nitems * size;
459 if (size && len / size != nitems) {
460 gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
461 return 0;
462 }
463
464 /* read len or fewer bytes to buf, return the number of full items read */
465 return len ? gz_read(state, buf, len) / size : 0;
466 }
467
468 /* -- see zlib.h -- */
469 #ifdef Z_PREFIX_SET
470 # undef z_gzgetc
471 #else
472 # undef gzgetc
473 #endif
gzgetc(gzFile file)474 int ZEXPORT gzgetc(gzFile file) {
475 unsigned char buf[1];
476 gz_statep state;
477
478 /* get internal structure and check that it's for reading */
479 if (file == NULL)
480 return -1;
481 state = (gz_statep)file;
482 if (state->mode != GZ_READ)
483 return -1;
484
485 /* check that there was no (serious) error */
486 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
487 return -1;
488 gz_error(state, Z_OK, NULL);
489
490 /* try output buffer (no need to check for skip request) */
491 if (state->x.have) {
492 state->x.have--;
493 state->x.pos++;
494 return *(state->x.next)++;
495 }
496
497 /* nothing there -- try gz_read() */
498 return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
499 }
500
gzgetc_(gzFile file)501 int ZEXPORT gzgetc_(gzFile file) {
502 return gzgetc(file);
503 }
504
505 /* -- see zlib.h -- */
gzungetc(int c,gzFile file)506 int ZEXPORT gzungetc(int c, gzFile file) {
507 gz_statep state;
508
509 /* get internal structure and check that it's for reading */
510 if (file == NULL)
511 return -1;
512 state = (gz_statep)file;
513 if (state->mode != GZ_READ)
514 return -1;
515
516 /* in case this was just opened, set up the input buffer */
517 if (state->how == LOOK && state->x.have == 0)
518 (void)gz_look(state);
519
520 /* check that there was no (serious) error */
521 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
522 return -1;
523 gz_error(state, Z_OK, NULL);
524
525 /* process a skip request */
526 if (state->skip && gz_skip(state) == -1)
527 return -1;
528
529 /* can't push EOF */
530 if (c < 0)
531 return -1;
532
533 /* if output buffer empty, put byte at end (allows more pushing) */
534 if (state->x.have == 0) {
535 state->x.have = 1;
536 state->x.next = state->out + (state->size << 1) - 1;
537 state->x.next[0] = (unsigned char)c;
538 state->x.pos--;
539 state->past = 0;
540 return c;
541 }
542
543 /* if no room, give up (must have already done a gzungetc()) */
544 if (state->x.have == (state->size << 1)) {
545 gz_error(state, Z_DATA_ERROR, "out of room to push characters");
546 return -1;
547 }
548
549 /* slide output data if needed and insert byte before existing data */
550 if (state->x.next == state->out) {
551 unsigned char *src = state->out + state->x.have;
552 unsigned char *dest = state->out + (state->size << 1);
553
554 while (src > state->out)
555 *--dest = *--src;
556 state->x.next = dest;
557 }
558 state->x.have++;
559 state->x.next--;
560 state->x.next[0] = (unsigned char)c;
561 state->x.pos--;
562 state->past = 0;
563 return c;
564 }
565
566 /* -- see zlib.h -- */
gzgets(gzFile file,char * buf,int len)567 char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
568 unsigned left, n;
569 char *str;
570 unsigned char *eol;
571 gz_statep state;
572
573 /* check parameters, get internal structure, and check that it's for
574 reading */
575 if (file == NULL || buf == NULL || len < 1)
576 return NULL;
577 state = (gz_statep)file;
578 if (state->mode != GZ_READ)
579 return NULL;
580
581 /* check that there was no (serious) error */
582 if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
583 return NULL;
584 gz_error(state, Z_OK, NULL);
585
586 /* process a skip request */
587 if (state->skip && gz_skip(state) == -1)
588 return NULL;
589
590 /* copy output up to a new line, len-1 bytes, or there is no more output,
591 whichever comes first */
592 str = buf;
593 left = (unsigned)len - 1;
594 if (left) do {
595 /* assure that something is in the output buffer */
596 if (state->x.have == 0 && gz_fetch(state) == -1)
597 break; /* error */
598 if (state->x.have == 0) { /* end of file */
599 state->past = 1; /* read past end */
600 break; /* return what we have */
601 }
602
603 /* look for end-of-line in current output buffer */
604 n = state->x.have > left ? left : state->x.have;
605 eol = (unsigned char *)memchr(state->x.next, '\n', n);
606 if (eol != NULL)
607 n = (unsigned)(eol - state->x.next) + 1;
608
609 /* copy through end-of-line, or remainder if not found */
610 memcpy(buf, state->x.next, n);
611 state->x.have -= n;
612 state->x.next += n;
613 state->x.pos += n;
614 left -= n;
615 buf += n;
616 } while (left && eol == NULL);
617
618 /* append a terminating zero to the string (we don't check for a zero in
619 the contents, let the user worry about that) -- return the terminated
620 string, or if nothing was read, NULL */
621 if (buf == str)
622 return NULL;
623 buf[0] = 0;
624 return str;
625 }
626
627 /* -- see zlib.h -- */
gzdirect(gzFile file)628 int ZEXPORT gzdirect(gzFile file) {
629 gz_statep state;
630
631 /* get internal structure */
632 if (file == NULL)
633 return 0;
634 state = (gz_statep)file;
635
636 /* if the state is not known, but we can find out, then do so (this is
637 mainly for right after a gzopen() or gzdopen()) */
638 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
639 (void)gz_look(state);
640
641 /* return 1 if transparent, 0 if processing a gzip stream */
642 return state->direct == 1;
643 }
644
645 /* -- see zlib.h -- */
gzclose_r(gzFile file)646 int ZEXPORT gzclose_r(gzFile file) {
647 int ret, err;
648 gz_statep state;
649
650 /* get internal structure and check that it's for reading */
651 if (file == NULL)
652 return Z_STREAM_ERROR;
653 state = (gz_statep)file;
654 if (state->mode != GZ_READ)
655 return Z_STREAM_ERROR;
656
657 /* free memory and close file */
658 if (state->size) {
659 inflateEnd(&(state->strm));
660 free(state->out);
661 free(state->in);
662 }
663 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
664 gz_error(state, Z_OK, NULL);
665 free(state->path);
666 ret = close(state->fd);
667 free(state);
668 return ret ? Z_ERRNO : err;
669 }
670