xref: /src/sys/contrib/zlib/gzread.c (revision 7aa1dba6b00ccfb7d66627badc8a7aaa06b02946)
1 /* gzread.c -- zlib functions for reading gzip files
2  * Copyright (C) 2004-2026 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #include "gzguts.h"
7 #include <unistd.h>
8 
9 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
10    state->fd, and update state->eof, state->err, and state->msg as appropriate.
11    This function needs to loop on read(), since read() is not guaranteed to
12    read the number of bytes requested, depending on the type of descriptor. It
13    also needs to loop to manage the fact that read() returns an int. If the
14    descriptor is non-blocking and read() returns with no data in order to avoid
15    blocking, then gz_load() will return 0 if some data has been read, or -1 if
16    no data has been read. Either way, state->again is set true to indicate a
17    non-blocking event. If errno is non-zero on return, then there was an error
18    signaled from read().  *have is set to the number of bytes read. */
gz_load(gz_statep state,unsigned char * buf,unsigned len,unsigned * have)19 local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
20                   unsigned *have) {
21     int ret;
22     unsigned get, max = ((unsigned)-1 >> 2) + 1;
23 
24     state->again = 0;
25     errno = 0;
26     *have = 0;
27     do {
28         get = len - *have;
29         if (get > max)
30             get = max;
31         ret = (int)read(state->fd, buf + *have, get);
32         if (ret <= 0)
33             break;
34         *have += (unsigned)ret;
35     } while (*have < len);
36     if (ret < 0) {
37         if (errno == EAGAIN || errno == EWOULDBLOCK) {
38             state->again = 1;
39             if (*have != 0)
40                 return 0;
41         }
42         gz_error(state, Z_ERRNO, zstrerror());
43         return -1;
44     }
45     if (ret == 0)
46         state->eof = 1;
47     return 0;
48 }
49 
50 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
51    error, 0 otherwise.  Note that the eof flag is set when the end of the input
52    file is reached, even though there may be unused data in the buffer.  Once
53    that data has been used, no more attempts will be made to read the file.
54    If strm->avail_in != 0, then the current data is moved to the beginning of
55    the input buffer, and then the remainder of the buffer is loaded with the
56    available data from the input file. */
gz_avail(gz_statep state)57 local int gz_avail(gz_statep state) {
58     unsigned got;
59     z_streamp strm = &(state->strm);
60 
61     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
62         return -1;
63     if (state->eof == 0) {
64         if (strm->avail_in) {       /* copy what's there to the start */
65             unsigned char *p = state->in;
66             unsigned const char *q = strm->next_in;
67 
68             if (q != p) {
69                 unsigned n = strm->avail_in;
70 
71                 do {
72                     *p++ = *q++;
73                 } while (--n);
74             }
75         }
76         if (gz_load(state, state->in + strm->avail_in,
77                     state->size - strm->avail_in, &got) == -1)
78             return -1;
79         strm->avail_in += got;
80         strm->next_in = state->in;
81     }
82     return 0;
83 }
84 
85 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
86    If this is the first time in, allocate required memory.  state->how will be
87    left unchanged if there is no more input data available, will be set to COPY
88    if there is no gzip header and direct copying will be performed, or it will
89    be set to GZIP for decompression.  If direct copying, then leftover input
90    data from the input buffer will be copied to the output buffer.  In that
91    case, all further file reads will be directly to either the output buffer or
92    a user buffer.  If decompressing, the inflate state will be initialized.
93    gz_look() will return 0 on success or -1 on failure. */
gz_look(gz_statep state)94 local int gz_look(gz_statep state) {
95     z_streamp strm = &(state->strm);
96 
97     /* allocate read buffers and inflate memory */
98     if (state->size == 0) {
99         /* allocate buffers */
100         state->in = (unsigned char *)malloc(state->want);
101         state->out = (unsigned char *)malloc(state->want << 1);
102         if (state->in == NULL || state->out == NULL) {
103             free(state->out);
104             free(state->in);
105             gz_error(state, Z_MEM_ERROR, "out of memory");
106             return -1;
107         }
108         state->size = state->want;
109 
110         /* allocate inflate memory */
111         state->strm.zalloc = Z_NULL;
112         state->strm.zfree = Z_NULL;
113         state->strm.opaque = Z_NULL;
114         state->strm.avail_in = 0;
115         state->strm.next_in = Z_NULL;
116         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
117             free(state->out);
118             free(state->in);
119             state->size = 0;
120             gz_error(state, Z_MEM_ERROR, "out of memory");
121             return -1;
122         }
123     }
124 
125     /* if transparent reading is disabled, which would only be at the start, or
126        if we're looking for a gzip member after the first one, which is not at
127        the start, then proceed directly to look for a gzip member next */
128     if (state->direct == -1 || state->junk == 0) {
129         inflateReset(strm);
130         state->how = GZIP;
131         state->junk = state->junk != -1;
132         state->direct = 0;
133         return 0;
134     }
135 
136     /* otherwise we're at the start with auto-detect -- we check to see if the
137        first four bytes could be gzip header in order to decide whether or not
138        this will be a transparent read */
139 
140     /* load any header bytes into the input buffer -- if the input is empty,
141        then it's not an error as this is a transparent read of zero bytes */
142     if (gz_avail(state) == -1)
143         return -1;
144     if (strm->avail_in == 0 || (state->again && strm->avail_in < 4))
145         /* if non-blocking input stalled before getting four bytes, then
146            return and wait until a later call has accumulated enough */
147         return 0;
148 
149     /* see if this is (likely) gzip input -- if the first four bytes are
150        consistent with a gzip header, then go look for the first gzip member,
151        otherwise proceed to copy the input transparently */
152     if (strm->avail_in > 3 &&
153             strm->next_in[0] == 31 && strm->next_in[1] == 139 &&
154             strm->next_in[2] == 8 && strm->next_in[3] < 32) {
155         inflateReset(strm);
156         state->how = GZIP;
157         state->junk = 1;
158         state->direct = 0;
159         return 0;
160     }
161 
162     /* doing raw i/o: copy any leftover input to output -- this assumes that
163        the output buffer is larger than the input buffer, which also assures
164        space for gzungetc() */
165     state->x.next = state->out;
166     memcpy(state->x.next, strm->next_in, strm->avail_in);
167     state->x.have = strm->avail_in;
168     strm->avail_in = 0;
169     state->how = COPY;
170     return 0;
171 }
172 
173 /* Decompress from input to the provided next_out and avail_out in the state.
174    On return, state->x.have and state->x.next point to the just decompressed
175    data. If the gzip stream completes, state->how is reset to LOOK to look for
176    the next gzip stream or raw data, once state->x.have is depleted. Returns 0
177    on success, -1 on failure. If EOF is reached when looking for more input to
178    complete the gzip member, then an unexpected end of file error is raised.
179    If there is no more input, but state->again is true, then EOF has not been
180    reached, and no error is raised. */
gz_decomp(gz_statep state)181 local int gz_decomp(gz_statep state) {
182     int ret = Z_OK;
183     unsigned had;
184     z_streamp strm = &(state->strm);
185 
186     /* fill output buffer up to end of deflate stream */
187     had = strm->avail_out;
188     do {
189         /* get more input for inflate() */
190         if (strm->avail_in == 0 && gz_avail(state) == -1) {
191             ret = state->err;
192             break;
193         }
194         if (strm->avail_in == 0) {
195             if (!state->again)
196                 gz_error(state, Z_BUF_ERROR, "unexpected end of file");
197             break;
198         }
199 
200         /* decompress and handle errors */
201         ret = inflate(strm, Z_NO_FLUSH);
202         if (strm->avail_out < had)
203             /* any decompressed data marks this as a real gzip stream */
204             state->junk = 0;
205         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
206             gz_error(state, Z_STREAM_ERROR,
207                      "internal error: inflate stream corrupt");
208             break;
209         }
210         if (ret == Z_MEM_ERROR) {
211             gz_error(state, Z_MEM_ERROR, "out of memory");
212             break;
213         }
214         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
215             if (state->junk == 1) {             /* trailing garbage is ok */
216                 strm->avail_in = 0;
217                 state->eof = 1;
218                 state->how = LOOK;
219                 ret = Z_OK;
220                 break;
221             }
222             gz_error(state, Z_DATA_ERROR,
223                      strm->msg == NULL ? "compressed data error" : strm->msg);
224             break;
225         }
226     } while (strm->avail_out && ret != Z_STREAM_END);
227 
228     /* update available output */
229     state->x.have = had - strm->avail_out;
230     state->x.next = strm->next_out - state->x.have;
231 
232     /* if the gzip stream completed successfully, look for another */
233     if (ret == Z_STREAM_END) {
234         state->junk = 0;
235         state->how = LOOK;
236         return 0;
237     }
238 
239     /* return decompression status */
240     return ret != Z_OK ? -1 : 0;
241 }
242 
243 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
244    Data is either copied from the input file or decompressed from the input
245    file depending on state->how.  If state->how is LOOK, then a gzip header is
246    looked for to determine whether to copy or decompress.  Returns -1 on error,
247    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
248    end of the input file has been reached and all data has been processed.  */
gz_fetch(gz_statep state)249 local int gz_fetch(gz_statep state) {
250     z_streamp strm = &(state->strm);
251 
252     do {
253         switch(state->how) {
254         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
255             if (gz_look(state) == -1)
256                 return -1;
257             if (state->how == LOOK)
258                 return 0;
259             break;
260         case COPY:      /* -> COPY */
261             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
262                     == -1)
263                 return -1;
264             state->x.next = state->out;
265             return 0;
266         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
267             strm->avail_out = state->size << 1;
268             strm->next_out = state->out;
269             if (gz_decomp(state) == -1)
270                 return -1;
271             break;
272         default:
273             gz_error(state, Z_STREAM_ERROR, "state corrupt");
274             return -1;
275         }
276     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
277     return 0;
278 }
279 
280 /* Skip state->skip (> 0) uncompressed bytes of output.  Return -1 on error, 0
281    on success. */
gz_skip(gz_statep state)282 local int gz_skip(gz_statep state) {
283     unsigned n;
284 
285     /* skip over len bytes or reach end-of-file, whichever comes first */
286     do {
287         /* skip over whatever is in output buffer */
288         if (state->x.have) {
289             n = GT_OFF(state->x.have) ||
290                 (z_off64_t)state->x.have > state->skip ?
291                 (unsigned)state->skip : state->x.have;
292             state->x.have -= n;
293             state->x.next += n;
294             state->x.pos += n;
295             state->skip -= n;
296         }
297 
298         /* output buffer empty -- return if we're at the end of the input */
299         else if (state->eof && state->strm.avail_in == 0)
300             break;
301 
302         /* need more data to skip -- load up output buffer */
303         else {
304             /* get more output, looking for header if required */
305             if (gz_fetch(state) == -1)
306                 return -1;
307         }
308     } while (state->skip);
309     return 0;
310 }
311 
312 /* Read len bytes into buf from file, or less than len up to the end of the
313    input. Return the number of bytes read. If zero is returned, either the end
314    of file was reached, or there was an error. state->err must be consulted in
315    that case to determine which. If there was an error, but some uncompressed
316    bytes were read before the error, then that count is returned. The error is
317    still recorded, and so is deferred until the next call. */
gz_read(gz_statep state,voidp buf,z_size_t len)318 local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
319     z_size_t got;
320     unsigned n;
321     int err;
322 
323     /* if len is zero, avoid unnecessary operations */
324     if (len == 0)
325         return 0;
326 
327     /* process a skip request */
328     if (state->skip && gz_skip(state) == -1)
329         return 0;
330 
331     /* get len bytes to buf, or less than len if at the end */
332     got = 0;
333     err = 0;
334     do {
335         /* set n to the maximum amount of len that fits in an unsigned int */
336         n = (unsigned)-1;
337         if (n > len)
338             n = (unsigned)len;
339 
340         /* first just try copying data from the output buffer */
341         if (state->x.have) {
342             if (state->x.have < n)
343                 n = state->x.have;
344             memcpy(buf, state->x.next, n);
345             state->x.next += n;
346             state->x.have -= n;
347             if (state->err != Z_OK)
348                 /* caught deferred error from gz_fetch() */
349                 err = -1;
350         }
351 
352         /* output buffer empty -- return if we're at the end of the input */
353         else if (state->eof && state->strm.avail_in == 0)
354             break;
355 
356         /* need output data -- for small len or new stream load up our output
357            buffer, so that gzgetc() can be fast */
358         else if (state->how == LOOK || n < (state->size << 1)) {
359             /* get more output, looking for header if required */
360             if (gz_fetch(state) == -1 && state->x.have == 0)
361                 /* if state->x.have != 0, error will be caught after copy */
362                 err = -1;
363             continue;       /* no progress yet -- go back to copy above */
364             /* the copy above assures that we will leave with space in the
365                output buffer, allowing at least one gzungetc() to succeed */
366         }
367 
368         /* large len -- read directly into user buffer */
369         else if (state->how == COPY)        /* read directly */
370             err = gz_load(state, (unsigned char *)buf, n, &n);
371 
372         /* large len -- decompress directly into user buffer */
373         else {  /* state->how == GZIP */
374             state->strm.avail_out = n;
375             state->strm.next_out = (unsigned char *)buf;
376             err = gz_decomp(state);
377             n = state->x.have;
378             state->x.have = 0;
379         }
380 
381         /* update progress */
382         len -= n;
383         buf = (char *)buf + n;
384         got += n;
385         state->x.pos += n;
386     } while (len && !err);
387 
388     /* note read past eof */
389     if (len && state->eof)
390         state->past = 1;
391 
392     /* return number of bytes read into user buffer */
393     return got;
394 }
395 
396 /* -- see zlib.h -- */
gzread(gzFile file,voidp buf,unsigned len)397 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
398     gz_statep state;
399 
400     /* get internal structure and check that it's for reading */
401     if (file == NULL)
402         return -1;
403     state = (gz_statep)file;
404     if (state->mode != GZ_READ)
405         return -1;
406 
407     /* check that there was no (serious) error */
408     if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
409         return -1;
410     gz_error(state, Z_OK, NULL);
411 
412     /* since an int is returned, make sure len fits in one, otherwise return
413        with an error (this avoids a flaw in the interface) */
414     if ((int)len < 0) {
415         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
416         return -1;
417     }
418 
419     /* read len or fewer bytes to buf */
420     len = (unsigned)gz_read(state, buf, len);
421 
422     /* check for an error */
423     if (len == 0) {
424         if (state->err != Z_OK && state->err != Z_BUF_ERROR)
425             return -1;
426         if (state->again) {
427             /* non-blocking input stalled after some input was read, but no
428                uncompressed bytes were produced -- let the application know
429                this isn't EOF */
430             gz_error(state, Z_ERRNO, zstrerror());
431             return -1;
432         }
433     }
434 
435     /* return the number of bytes read */
436     return (int)len;
437 }
438 
439 /* -- see zlib.h -- */
gzfread(voidp buf,z_size_t size,z_size_t nitems,gzFile file)440 z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
441                          gzFile file) {
442     z_size_t len;
443     gz_statep state;
444 
445     /* get internal structure and check that it's for reading */
446     if (file == NULL)
447         return 0;
448     state = (gz_statep)file;
449     if (state->mode != GZ_READ)
450         return 0;
451 
452     /* check that there was no (serious) error */
453     if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
454         return 0;
455     gz_error(state, Z_OK, NULL);
456 
457     /* compute bytes to read -- error on overflow */
458     len = nitems * size;
459     if (size && len / size != nitems) {
460         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
461         return 0;
462     }
463 
464     /* read len or fewer bytes to buf, return the number of full items read */
465     return len ? gz_read(state, buf, len) / size : 0;
466 }
467 
468 /* -- see zlib.h -- */
469 #ifdef Z_PREFIX_SET
470 #  undef z_gzgetc
471 #else
472 #  undef gzgetc
473 #endif
gzgetc(gzFile file)474 int ZEXPORT gzgetc(gzFile file) {
475     unsigned char buf[1];
476     gz_statep state;
477 
478     /* get internal structure and check that it's for reading */
479     if (file == NULL)
480         return -1;
481     state = (gz_statep)file;
482     if (state->mode != GZ_READ)
483         return -1;
484 
485     /* check that there was no (serious) error */
486     if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
487         return -1;
488     gz_error(state, Z_OK, NULL);
489 
490     /* try output buffer (no need to check for skip request) */
491     if (state->x.have) {
492         state->x.have--;
493         state->x.pos++;
494         return *(state->x.next)++;
495     }
496 
497     /* nothing there -- try gz_read() */
498     return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
499 }
500 
gzgetc_(gzFile file)501 int ZEXPORT gzgetc_(gzFile file) {
502     return gzgetc(file);
503 }
504 
505 /* -- see zlib.h -- */
gzungetc(int c,gzFile file)506 int ZEXPORT gzungetc(int c, gzFile file) {
507     gz_statep state;
508 
509     /* get internal structure and check that it's for reading */
510     if (file == NULL)
511         return -1;
512     state = (gz_statep)file;
513     if (state->mode != GZ_READ)
514         return -1;
515 
516     /* in case this was just opened, set up the input buffer */
517     if (state->how == LOOK && state->x.have == 0)
518         (void)gz_look(state);
519 
520     /* check that there was no (serious) error */
521     if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
522         return -1;
523     gz_error(state, Z_OK, NULL);
524 
525     /* process a skip request */
526     if (state->skip && gz_skip(state) == -1)
527         return -1;
528 
529     /* can't push EOF */
530     if (c < 0)
531         return -1;
532 
533     /* if output buffer empty, put byte at end (allows more pushing) */
534     if (state->x.have == 0) {
535         state->x.have = 1;
536         state->x.next = state->out + (state->size << 1) - 1;
537         state->x.next[0] = (unsigned char)c;
538         state->x.pos--;
539         state->past = 0;
540         return c;
541     }
542 
543     /* if no room, give up (must have already done a gzungetc()) */
544     if (state->x.have == (state->size << 1)) {
545         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
546         return -1;
547     }
548 
549     /* slide output data if needed and insert byte before existing data */
550     if (state->x.next == state->out) {
551         unsigned char *src = state->out + state->x.have;
552         unsigned char *dest = state->out + (state->size << 1);
553 
554         while (src > state->out)
555             *--dest = *--src;
556         state->x.next = dest;
557     }
558     state->x.have++;
559     state->x.next--;
560     state->x.next[0] = (unsigned char)c;
561     state->x.pos--;
562     state->past = 0;
563     return c;
564 }
565 
566 /* -- see zlib.h -- */
gzgets(gzFile file,char * buf,int len)567 char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
568     unsigned left, n;
569     char *str;
570     unsigned char *eol;
571     gz_statep state;
572 
573     /* check parameters, get internal structure, and check that it's for
574        reading */
575     if (file == NULL || buf == NULL || len < 1)
576         return NULL;
577     state = (gz_statep)file;
578     if (state->mode != GZ_READ)
579         return NULL;
580 
581     /* check that there was no (serious) error */
582     if (state->err != Z_OK && state->err != Z_BUF_ERROR && !state->again)
583         return NULL;
584     gz_error(state, Z_OK, NULL);
585 
586     /* process a skip request */
587     if (state->skip && gz_skip(state) == -1)
588         return NULL;
589 
590     /* copy output up to a new line, len-1 bytes, or there is no more output,
591        whichever comes first */
592     str = buf;
593     left = (unsigned)len - 1;
594     if (left) do {
595         /* assure that something is in the output buffer */
596         if (state->x.have == 0 && gz_fetch(state) == -1)
597             break;                      /* error */
598         if (state->x.have == 0) {       /* end of file */
599             state->past = 1;            /* read past end */
600             break;                      /* return what we have */
601         }
602 
603         /* look for end-of-line in current output buffer */
604         n = state->x.have > left ? left : state->x.have;
605         eol = (unsigned char *)memchr(state->x.next, '\n', n);
606         if (eol != NULL)
607             n = (unsigned)(eol - state->x.next) + 1;
608 
609         /* copy through end-of-line, or remainder if not found */
610         memcpy(buf, state->x.next, n);
611         state->x.have -= n;
612         state->x.next += n;
613         state->x.pos += n;
614         left -= n;
615         buf += n;
616     } while (left && eol == NULL);
617 
618     /* append a terminating zero to the string (we don't check for a zero in
619        the contents, let the user worry about that) -- return the terminated
620        string, or if nothing was read, NULL */
621     if (buf == str)
622         return NULL;
623     buf[0] = 0;
624     return str;
625 }
626 
627 /* -- see zlib.h -- */
gzdirect(gzFile file)628 int ZEXPORT gzdirect(gzFile file) {
629     gz_statep state;
630 
631     /* get internal structure */
632     if (file == NULL)
633         return 0;
634     state = (gz_statep)file;
635 
636     /* if the state is not known, but we can find out, then do so (this is
637        mainly for right after a gzopen() or gzdopen()) */
638     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
639         (void)gz_look(state);
640 
641     /* return 1 if transparent, 0 if processing a gzip stream */
642     return state->direct == 1;
643 }
644 
645 /* -- see zlib.h -- */
gzclose_r(gzFile file)646 int ZEXPORT gzclose_r(gzFile file) {
647     int ret, err;
648     gz_statep state;
649 
650     /* get internal structure and check that it's for reading */
651     if (file == NULL)
652         return Z_STREAM_ERROR;
653     state = (gz_statep)file;
654     if (state->mode != GZ_READ)
655         return Z_STREAM_ERROR;
656 
657     /* free memory and close file */
658     if (state->size) {
659         inflateEnd(&(state->strm));
660         free(state->out);
661         free(state->in);
662     }
663     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
664     gz_error(state, Z_OK, NULL);
665     free(state->path);
666     ret = close(state->fd);
667     free(state);
668     return ret ? Z_ERRNO : err;
669 }
670