1 /* inftrees.c -- generate Huffman trees for efficient decoding
2 * Copyright (C) 1995-2026 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 #ifdef MAKEFIXED
7 # ifndef BUILDFIXED
8 # define BUILDFIXED
9 # endif
10 #endif
11 #ifdef BUILDFIXED
12 # define Z_ONCE
13 #endif
14
15 #include "zutil.h"
16 #include "inftrees.h"
17 #include "inflate.h"
18
19 #ifndef NULL
20 # define NULL 0
21 #endif
22
23 #define MAXBITS 15
24
25 const char inflate_copyright[] =
26 " inflate 1.3.2 Copyright 1995-2026 Mark Adler ";
27 /*
28 If you use the zlib library in a product, an acknowledgment is welcome
29 in the documentation of your product. If for some reason you cannot
30 include such an acknowledgment, I would appreciate that you keep this
31 copyright string in the executable of your product.
32 */
33
34 /*
35 Build a set of tables to decode the provided canonical Huffman code.
36 The code lengths are lens[0..codes-1]. The result starts at *table,
37 whose indices are 0..2^bits-1. work is a writable array of at least
38 lens shorts, which is used as a work area. type is the type of code
39 to be generated, CODES, LENS, or DISTS. On return, zero is success,
40 -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
41 on return points to the next available entry's address. bits is the
42 requested root table index bits, and on return it is the actual root
43 table index bits. It will differ if the request is greater than the
44 longest code or if it is less than the shortest code.
45 */
inflate_table(codetype type,unsigned short FAR * lens,unsigned codes,code FAR * FAR * table,unsigned FAR * bits,unsigned short FAR * work)46 int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
47 unsigned codes, code FAR * FAR *table,
48 unsigned FAR *bits, unsigned short FAR *work) {
49 unsigned len; /* a code's length in bits */
50 unsigned sym; /* index of code symbols */
51 unsigned min, max; /* minimum and maximum code lengths */
52 unsigned root; /* number of index bits for root table */
53 unsigned curr; /* number of index bits for current table */
54 unsigned drop; /* code bits to drop for sub-table */
55 int left; /* number of prefix codes available */
56 unsigned used; /* code entries in table used */
57 unsigned huff; /* Huffman code */
58 unsigned incr; /* for incrementing code, index */
59 unsigned fill; /* index for replicating entries */
60 unsigned low; /* low bits for current root entry */
61 unsigned mask; /* mask for low root bits */
62 code here; /* table entry for duplication */
63 code FAR *next; /* next available space in table */
64 const unsigned short FAR *base = NULL; /* base value table to use */
65 const unsigned short FAR *extra = NULL; /* extra bits table to use */
66 unsigned match = 0; /* use base and extra for symbol >= match */
67 unsigned short count[MAXBITS+1]; /* number of codes of each length */
68 unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
69 static const unsigned short lbase[31] = { /* Length codes 257..285 base */
70 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
71 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
72 static const unsigned short lext[31] = { /* Length codes 257..285 extra */
73 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
74 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 75};
75 static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
76 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
77 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
78 8193, 12289, 16385, 24577, 0, 0};
79 static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
80 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
81 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
82 28, 28, 29, 29, 64, 64};
83
84 /*
85 Process a set of code lengths to create a canonical Huffman code. The
86 code lengths are lens[0..codes-1]. Each length corresponds to the
87 symbols 0..codes-1. The Huffman code is generated by first sorting the
88 symbols by length from short to long, and retaining the symbol order
89 for codes with equal lengths. Then the code starts with all zero bits
90 for the first code of the shortest length, and the codes are integer
91 increments for the same length, and zeros are appended as the length
92 increases. For the deflate format, these bits are stored backwards
93 from their more natural integer increment ordering, and so when the
94 decoding tables are built in the large loop below, the integer codes
95 are incremented backwards.
96
97 This routine assumes, but does not check, that all of the entries in
98 lens[] are in the range 0..MAXBITS. The caller must assure this.
99 1..MAXBITS is interpreted as that code length. zero means that that
100 symbol does not occur in this code.
101
102 The codes are sorted by computing a count of codes for each length,
103 creating from that a table of starting indices for each length in the
104 sorted table, and then entering the symbols in order in the sorted
105 table. The sorted table is work[], with that space being provided by
106 the caller.
107
108 The length counts are used for other purposes as well, i.e. finding
109 the minimum and maximum length codes, determining if there are any
110 codes at all, checking for a valid set of lengths, and looking ahead
111 at length counts to determine sub-table sizes when building the
112 decoding tables.
113 */
114
115 /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
116 for (len = 0; len <= MAXBITS; len++)
117 count[len] = 0;
118 for (sym = 0; sym < codes; sym++)
119 count[lens[sym]]++;
120
121 /* bound code lengths, force root to be within code lengths */
122 root = *bits;
123 for (max = MAXBITS; max >= 1; max--)
124 if (count[max] != 0) break;
125 if (root > max) root = max;
126 if (max == 0) { /* no symbols to code at all */
127 here.op = (unsigned char)64; /* invalid code marker */
128 here.bits = (unsigned char)1;
129 here.val = (unsigned short)0;
130 *(*table)++ = here; /* make a table to force an error */
131 *(*table)++ = here;
132 *bits = 1;
133 return 0; /* no symbols, but wait for decoding to report error */
134 }
135 for (min = 1; min < max; min++)
136 if (count[min] != 0) break;
137 if (root < min) root = min;
138
139 /* check for an over-subscribed or incomplete set of lengths */
140 left = 1;
141 for (len = 1; len <= MAXBITS; len++) {
142 left <<= 1;
143 left -= count[len];
144 if (left < 0) return -1; /* over-subscribed */
145 }
146 if (left > 0 && (type == CODES || max != 1))
147 return -1; /* incomplete set */
148
149 /* generate offsets into symbol table for each length for sorting */
150 offs[1] = 0;
151 for (len = 1; len < MAXBITS; len++)
152 offs[len + 1] = offs[len] + count[len];
153
154 /* sort symbols by length, by symbol order within each length */
155 for (sym = 0; sym < codes; sym++)
156 if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
157
158 /*
159 Create and fill in decoding tables. In this loop, the table being
160 filled is at next and has curr index bits. The code being used is huff
161 with length len. That code is converted to an index by dropping drop
162 bits off of the bottom. For codes where len is less than drop + curr,
163 those top drop + curr - len bits are incremented through all values to
164 fill the table with replicated entries.
165
166 root is the number of index bits for the root table. When len exceeds
167 root, sub-tables are created pointed to by the root entry with an index
168 of the low root bits of huff. This is saved in low to check for when a
169 new sub-table should be started. drop is zero when the root table is
170 being filled, and drop is root when sub-tables are being filled.
171
172 When a new sub-table is needed, it is necessary to look ahead in the
173 code lengths to determine what size sub-table is needed. The length
174 counts are used for this, and so count[] is decremented as codes are
175 entered in the tables.
176
177 used keeps track of how many table entries have been allocated from the
178 provided *table space. It is checked for LENS and DIST tables against
179 the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
180 the initial root table size constants. See the comments in inftrees.h
181 for more information.
182
183 sym increments through all symbols, and the loop terminates when
184 all codes of length max, i.e. all codes, have been processed. This
185 routine permits incomplete codes, so another loop after this one fills
186 in the rest of the decoding tables with invalid code markers.
187 */
188
189 /* set up for code type */
190 switch (type) {
191 case CODES:
192 match = 20;
193 break;
194 case LENS:
195 base = lbase;
196 extra = lext;
197 match = 257;
198 break;
199 case DISTS:
200 base = dbase;
201 extra = dext;
202 }
203
204 /* initialize state for loop */
205 huff = 0; /* starting code */
206 sym = 0; /* starting code symbol */
207 len = min; /* starting code length */
208 next = *table; /* current table to fill in */
209 curr = root; /* current table index bits */
210 drop = 0; /* current bits to drop from code for index */
211 low = (unsigned)(-1); /* trigger new sub-table when len > root */
212 used = 1U << root; /* use root table entries */
213 mask = used - 1; /* mask for comparing low */
214
215 /* check available table space */
216 if ((type == LENS && used > ENOUGH_LENS) ||
217 (type == DISTS && used > ENOUGH_DISTS))
218 return 1;
219
220 /* process all codes and make table entries */
221 for (;;) {
222 /* create table entry */
223 here.bits = (unsigned char)(len - drop);
224 if (work[sym] + 1U < match) {
225 here.op = (unsigned char)0;
226 here.val = work[sym];
227 }
228 else if (work[sym] >= match) {
229 here.op = (unsigned char)(extra[work[sym] - match]);
230 here.val = base[work[sym] - match];
231 }
232 else {
233 here.op = (unsigned char)(32 + 64); /* end of block */
234 here.val = 0;
235 }
236
237 /* replicate for those indices with low len bits equal to huff */
238 incr = 1U << (len - drop);
239 fill = 1U << curr;
240 min = fill; /* save offset to next table */
241 do {
242 fill -= incr;
243 next[(huff >> drop) + fill] = here;
244 } while (fill != 0);
245
246 /* backwards increment the len-bit code huff */
247 incr = 1U << (len - 1);
248 while (huff & incr)
249 incr >>= 1;
250 if (incr != 0) {
251 huff &= incr - 1;
252 huff += incr;
253 }
254 else
255 huff = 0;
256
257 /* go to next symbol, update count, len */
258 sym++;
259 if (--(count[len]) == 0) {
260 if (len == max) break;
261 len = lens[work[sym]];
262 }
263
264 /* create new sub-table if needed */
265 if (len > root && (huff & mask) != low) {
266 /* if first time, transition to sub-tables */
267 if (drop == 0)
268 drop = root;
269
270 /* increment past last table */
271 next += min; /* here min is 1 << curr */
272
273 /* determine length of next table */
274 curr = len - drop;
275 left = (int)(1 << curr);
276 while (curr + drop < max) {
277 left -= count[curr + drop];
278 if (left <= 0) break;
279 curr++;
280 left <<= 1;
281 }
282
283 /* check for enough space */
284 used += 1U << curr;
285 if ((type == LENS && used > ENOUGH_LENS) ||
286 (type == DISTS && used > ENOUGH_DISTS))
287 return 1;
288
289 /* point entry in root table to sub-table */
290 low = huff & mask;
291 (*table)[low].op = (unsigned char)curr;
292 (*table)[low].bits = (unsigned char)root;
293 (*table)[low].val = (unsigned short)(next - *table);
294 }
295 }
296
297 /* fill in remaining table entry if code is incomplete (guaranteed to have
298 at most one remaining entry, since if the code is incomplete, the
299 maximum code length that was allowed to get this far is one bit) */
300 if (huff != 0) {
301 here.op = (unsigned char)64; /* invalid code marker */
302 here.bits = (unsigned char)(len - drop);
303 here.val = (unsigned short)0;
304 next[huff] = here;
305 }
306
307 /* set return parameters */
308 *table += used;
309 *bits = root;
310 return 0;
311 }
312
313 #ifdef BUILDFIXED
314 /*
315 If this is compiled with BUILDFIXED defined, and if inflate will be used in
316 multiple threads, and if atomics are not available, then inflate() must be
317 called with a fixed block (e.g. 0x03 0x00) to initialize the tables and must
318 return before any other threads are allowed to call inflate.
319 */
320
321 static code *lenfix, *distfix;
322 static code fixed[544];
323
324 /* State for z_once(). */
325 local z_once_t built = Z_ONCE_INIT;
326
buildtables(void)327 local void buildtables(void) {
328 unsigned sym, bits;
329 static code *next;
330 unsigned short lens[288], work[288];
331
332 /* literal/length table */
333 sym = 0;
334 while (sym < 144) lens[sym++] = 8;
335 while (sym < 256) lens[sym++] = 9;
336 while (sym < 280) lens[sym++] = 7;
337 while (sym < 288) lens[sym++] = 8;
338 next = fixed;
339 lenfix = next;
340 bits = 9;
341 inflate_table(LENS, lens, 288, &(next), &(bits), work);
342
343 /* distance table */
344 sym = 0;
345 while (sym < 32) lens[sym++] = 5;
346 distfix = next;
347 bits = 5;
348 inflate_table(DISTS, lens, 32, &(next), &(bits), work);
349 }
350 #else /* !BUILDFIXED */
351 # include "inffixed.h"
352 #endif /* BUILDFIXED */
353
354 /*
355 Return state with length and distance decoding tables and index sizes set to
356 fixed code decoding. Normally this returns fixed tables from inffixed.h.
357 If BUILDFIXED is defined, then instead this routine builds the tables the
358 first time it's called, and returns those tables the first time and
359 thereafter. This reduces the size of the code by about 2K bytes, in
360 exchange for a little execution time. However, BUILDFIXED should not be
361 used for threaded applications if atomics are not available, as it will
362 not be thread-safe.
363 */
inflate_fixed(struct inflate_state FAR * state)364 void inflate_fixed(struct inflate_state FAR *state) {
365 #ifdef BUILDFIXED
366 z_once(&built, buildtables);
367 #endif /* BUILDFIXED */
368 state->lencode = lenfix;
369 state->lenbits = 9;
370 state->distcode = distfix;
371 state->distbits = 5;
372 }
373
374 #ifdef MAKEFIXED
375 #include <stdio.h>
376
377 /*
378 Write out the inffixed.h that will be #include'd above. Defining MAKEFIXED
379 also defines BUILDFIXED, so the tables are built on the fly. main() writes
380 those tables to stdout, which would directed to inffixed.h. Compile this
381 along with zutil.c:
382
383 cc -DMAKEFIXED -o fix inftrees.c zutil.c
384 ./fix > inffixed.h
385 */
main(void)386 int main(void) {
387 unsigned low, size;
388 struct inflate_state state;
389
390 inflate_fixed(&state);
391 puts("/* inffixed.h -- table for decoding fixed codes");
392 puts(" * Generated automatically by makefixed().");
393 puts(" */");
394 puts("");
395 puts("/* WARNING: this file should *not* be used by applications.");
396 puts(" It is part of the implementation of this library and is");
397 puts(" subject to change. Applications should only use zlib.h.");
398 puts(" */");
399 puts("");
400 size = 1U << 9;
401 printf("static const code lenfix[%u] = {", size);
402 low = 0;
403 for (;;) {
404 if ((low % 7) == 0) printf("\n ");
405 printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
406 state.lencode[low].bits, state.lencode[low].val);
407 if (++low == size) break;
408 putchar(',');
409 }
410 puts("\n};");
411 size = 1U << 5;
412 printf("\nstatic const code distfix[%u] = {", size);
413 low = 0;
414 for (;;) {
415 if ((low % 6) == 0) printf("\n ");
416 printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
417 state.distcode[low].val);
418 if (++low == size) break;
419 putchar(',');
420 }
421 puts("\n};");
422 return 0;
423 }
424 #endif /* MAKEFIXED */
425