xref: /src/sys/contrib/zlib/crc32.c (revision 7aa1dba6b00ccfb7d66627badc8a7aaa06b02946)
1 /* crc32.c -- compute the CRC-32 of a data stream
2  * Copyright (C) 1995-2026 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  *
5  * This interleaved implementation of a CRC makes use of pipelined multiple
6  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8  */
9 
10 /* @(#) $Id$ */
11 
12 /*
13   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14   protection on the static variables used to control the first-use generation
15   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16   first call get_crc_table() to initialize the tables before allowing more than
17   one thread to use crc32().
18 
19   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20   produced, so that this one source file can be compiled to an executable.
21  */
22 
23 #ifdef MAKECRCH
24 #  include <stdio.h>
25 #  ifndef DYNAMIC_CRC_TABLE
26 #    define DYNAMIC_CRC_TABLE
27 #  endif
28 #endif
29 #ifdef DYNAMIC_CRC_TABLE
30 #  define Z_ONCE
31 #endif
32 
33 #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
34 
35 #ifdef HAVE_S390X_VX
36 #  include "contrib/crc32vx/crc32_vx_hooks.h"
37 #endif
38 
39  /*
40   A CRC of a message is computed on N braids of words in the message, where
41   each word consists of W bytes (4 or 8). If N is 3, for example, then three
42   running sparse CRCs are calculated respectively on each braid, at these
43   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
44   This is done starting at a word boundary, and continues until as many blocks
45   of N * W bytes as are available have been processed. The results are combined
46   into a single CRC at the end. For this code, N must be in the range 1..6 and
47   W must be 4 or 8. The upper limit on N can be increased if desired by adding
48   more #if blocks, extending the patterns apparent in the code. In addition,
49   crc32.h would need to be regenerated, if the maximum N value is increased.
50 
51   N and W are chosen empirically by benchmarking the execution time on a given
52   processor. The choices for N and W below were based on testing on Intel Kaby
53   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
54   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
55   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
56   They were all tested with either gcc or clang, all using the -O3 optimization
57   level. Your mileage may vary.
58  */
59 
60 /* Define N */
61 #ifdef Z_TESTN
62 #  define N Z_TESTN
63 #else
64 #  define N 5
65 #endif
66 #if N < 1 || N > 6
67 #  error N must be in 1..6
68 #endif
69 
70 /*
71   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
72   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
73   that bytes are eight bits.
74  */
75 
76 /*
77   Define W and the associated z_word_t type. If W is not defined, then a
78   braided calculation is not used, and the associated tables and code are not
79   compiled.
80  */
81 #ifdef Z_TESTW
82 #  if Z_TESTW-1 != -1
83 #    define W Z_TESTW
84 #  endif
85 #else
86 #  ifdef MAKECRCH
87 #    define W 8         /* required for MAKECRCH */
88 #  else
89 #    if defined(__x86_64__) || defined(__aarch64__)
90 #      define W 8
91 #    else
92 #      define W 4
93 #    endif
94 #  endif
95 #endif
96 #ifdef W
97 #  if W == 8 && defined(Z_U8)
98      typedef Z_U8 z_word_t;
99 #  elif defined(Z_U4)
100 #    undef W
101 #    define W 4
102      typedef Z_U4 z_word_t;
103 #  else
104 #    undef W
105 #  endif
106 #endif
107 
108 /* If available, use the ARM processor CRC32 instruction. */
109 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && \
110     defined(W) && W == 8
111 #  define ARMCRC32
112 #endif
113 
114 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
115 /*
116   Swap the bytes in a z_word_t to convert between little and big endian. Any
117   self-respecting compiler will optimize this to a single machine byte-swap
118   instruction, if one is available. This assumes that word_t is either 32 bits
119   or 64 bits.
120  */
byte_swap(z_word_t word)121 local z_word_t byte_swap(z_word_t word) {
122 #  if W == 8
123     return
124         (word & 0xff00000000000000) >> 56 |
125         (word & 0xff000000000000) >> 40 |
126         (word & 0xff0000000000) >> 24 |
127         (word & 0xff00000000) >> 8 |
128         (word & 0xff000000) << 8 |
129         (word & 0xff0000) << 24 |
130         (word & 0xff00) << 40 |
131         (word & 0xff) << 56;
132 #  else   /* W == 4 */
133     return
134         (word & 0xff000000) >> 24 |
135         (word & 0xff0000) >> 8 |
136         (word & 0xff00) << 8 |
137         (word & 0xff) << 24;
138 #  endif
139 }
140 #endif
141 
142 #ifdef DYNAMIC_CRC_TABLE
143 /* =========================================================================
144  * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
145  * below.
146  */
147    local z_crc_t FAR x2n_table[32];
148 #else
149 /* =========================================================================
150  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
151  * of x for combining CRC-32s, all made by make_crc_table().
152  */
153 #  include "crc32.h"
154 #endif
155 
156 /* CRC polynomial. */
157 #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
158 
159 /*
160   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
161   reflected. For speed, this requires that a not be zero.
162  */
multmodp(uLong a,uLong b)163 local uLong multmodp(uLong a, uLong b) {
164     uLong m, p;
165 
166     m = (uLong)1 << 31;
167     p = 0;
168     for (;;) {
169         if (a & m) {
170             p ^= b;
171             if ((a & (m - 1)) == 0)
172                 break;
173         }
174         m >>= 1;
175         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
176     }
177     return p;
178 }
179 
180 /*
181   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
182   initialized. n must not be negative.
183  */
x2nmodp(z_off64_t n,unsigned k)184 local uLong x2nmodp(z_off64_t n, unsigned k) {
185     uLong p;
186 
187     p = (uLong)1 << 31;             /* x^0 == 1 */
188     while (n) {
189         if (n & 1)
190             p = multmodp(x2n_table[k & 31], p);
191         n >>= 1;
192         k++;
193     }
194     return p;
195 }
196 
197 #ifdef DYNAMIC_CRC_TABLE
198 /* =========================================================================
199  * Build the tables for byte-wise and braided CRC-32 calculations, and a table
200  * of powers of x for combining CRC-32s.
201  */
202 local z_crc_t FAR crc_table[256];
203 #ifdef W
204    local z_word_t FAR crc_big_table[256];
205    local z_crc_t FAR crc_braid_table[W][256];
206    local z_word_t FAR crc_braid_big_table[W][256];
207    local void braid(z_crc_t [][256], z_word_t [][256], int, int);
208 #endif
209 #ifdef MAKECRCH
210    local void write_table(FILE *, const z_crc_t FAR *, int);
211    local void write_table32hi(FILE *, const z_word_t FAR *, int);
212    local void write_table64(FILE *, const z_word_t FAR *, int);
213 #endif /* MAKECRCH */
214 
215 /* State for once(). */
216 local z_once_t made = Z_ONCE_INIT;
217 
218 /*
219   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
220   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
221 
222   Polynomials over GF(2) are represented in binary, one bit per coefficient,
223   with the lowest powers in the most significant bit. Then adding polynomials
224   is just exclusive-or, and multiplying a polynomial by x is a right shift by
225   one. If we call the above polynomial p, and represent a byte as the
226   polynomial q, also with the lowest power in the most significant bit (so the
227   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
228   where a mod b means the remainder after dividing a by b.
229 
230   This calculation is done using the shift-register method of multiplying and
231   taking the remainder. The register is initialized to zero, and for each
232   incoming bit, x^32 is added mod p to the register if the bit is a one (where
233   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
234   (which is shifting right by one and adding x^32 mod p if the bit shifted out
235   is a one). We start with the highest power (least significant bit) of q and
236   repeat for all eight bits of q.
237 
238   The table is simply the CRC of all possible eight bit values. This is all the
239   information needed to generate CRCs on data a byte at a time for all
240   combinations of CRC register values and incoming bytes.
241  */
242 
make_crc_table(void)243 local void make_crc_table(void) {
244     unsigned i, j, n;
245     z_crc_t p;
246 
247     /* initialize the CRC of bytes tables */
248     for (i = 0; i < 256; i++) {
249         p = i;
250         for (j = 0; j < 8; j++)
251             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
252         crc_table[i] = p;
253 #ifdef W
254         crc_big_table[i] = byte_swap(p);
255 #endif
256     }
257 
258     /* initialize the x^2^n mod p(x) table */
259     p = (z_crc_t)1 << 30;         /* x^1 */
260     x2n_table[0] = p;
261     for (n = 1; n < 32; n++)
262         x2n_table[n] = p = (z_crc_t)multmodp(p, p);
263 
264 #ifdef W
265     /* initialize the braiding tables -- needs x2n_table[] */
266     braid(crc_braid_table, crc_braid_big_table, N, W);
267 #endif
268 
269 #ifdef MAKECRCH
270     {
271         /*
272           The crc32.h header file contains tables for both 32-bit and 64-bit
273           z_word_t's, and so requires a 64-bit type be available. In that case,
274           z_word_t must be defined to be 64-bits. This code then also generates
275           and writes out the tables for the case that z_word_t is 32 bits.
276          */
277 #if !defined(W) || W != 8
278 #  error Need a 64-bit integer type in order to generate crc32.h.
279 #endif
280         FILE *out;
281         int k, n;
282         z_crc_t ltl[8][256];
283         z_word_t big[8][256];
284 
285         out = fopen("crc32.h", "w");
286         if (out == NULL) return;
287 
288         /* write out little-endian CRC table to crc32.h */
289         fprintf(out,
290             "/* crc32.h -- tables for rapid CRC calculation\n"
291             " * Generated automatically by crc32.c\n */\n"
292             "\n"
293             "local const z_crc_t FAR crc_table[] = {\n"
294             "    ");
295         write_table(out, crc_table, 256);
296         fprintf(out,
297             "};\n");
298 
299         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
300         fprintf(out,
301             "\n"
302             "#ifdef W\n"
303             "\n"
304             "#if W == 8\n"
305             "\n"
306             "local const z_word_t FAR crc_big_table[] = {\n"
307             "    ");
308         write_table64(out, crc_big_table, 256);
309         fprintf(out,
310             "};\n");
311 
312         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
313         fprintf(out,
314             "\n"
315             "#else /* W == 4 */\n"
316             "\n"
317             "local const z_word_t FAR crc_big_table[] = {\n"
318             "    ");
319         write_table32hi(out, crc_big_table, 256);
320         fprintf(out,
321             "};\n"
322             "\n"
323             "#endif\n");
324 
325         /* write out braid tables for each value of N */
326         for (n = 1; n <= 6; n++) {
327             fprintf(out,
328             "\n"
329             "#if N == %d\n", n);
330 
331             /* compute braid tables for this N and 64-bit word_t */
332             braid(ltl, big, n, 8);
333 
334             /* write out braid tables for 64-bit z_word_t to crc32.h */
335             fprintf(out,
336             "\n"
337             "#if W == 8\n"
338             "\n"
339             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
340             for (k = 0; k < 8; k++) {
341                 fprintf(out, "   {");
342                 write_table(out, ltl[k], 256);
343                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
344             }
345             fprintf(out,
346             "};\n"
347             "\n"
348             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
349             for (k = 0; k < 8; k++) {
350                 fprintf(out, "   {");
351                 write_table64(out, big[k], 256);
352                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
353             }
354             fprintf(out,
355             "};\n");
356 
357             /* compute braid tables for this N and 32-bit word_t */
358             braid(ltl, big, n, 4);
359 
360             /* write out braid tables for 32-bit z_word_t to crc32.h */
361             fprintf(out,
362             "\n"
363             "#else /* W == 4 */\n"
364             "\n"
365             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
366             for (k = 0; k < 4; k++) {
367                 fprintf(out, "   {");
368                 write_table(out, ltl[k], 256);
369                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
370             }
371             fprintf(out,
372             "};\n"
373             "\n"
374             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
375             for (k = 0; k < 4; k++) {
376                 fprintf(out, "   {");
377                 write_table32hi(out, big[k], 256);
378                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
379             }
380             fprintf(out,
381             "};\n"
382             "\n"
383             "#endif\n"
384             "\n"
385             "#endif\n");
386         }
387         fprintf(out,
388             "\n"
389             "#endif\n");
390 
391         /* write out zeros operator table to crc32.h */
392         fprintf(out,
393             "\n"
394             "local const z_crc_t FAR x2n_table[] = {\n"
395             "    ");
396         write_table(out, x2n_table, 32);
397         fprintf(out,
398             "};\n");
399         fclose(out);
400     }
401 #endif /* MAKECRCH */
402 }
403 
404 #ifdef MAKECRCH
405 
406 /*
407    Write the 32-bit values in table[0..k-1] to out, five per line in
408    hexadecimal separated by commas.
409  */
write_table(FILE * out,const z_crc_t FAR * table,int k)410 local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
411     int n;
412 
413     for (n = 0; n < k; n++)
414         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
415                 (unsigned long)(table[n]),
416                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
417 }
418 
419 /*
420    Write the high 32-bits of each value in table[0..k-1] to out, five per line
421    in hexadecimal separated by commas.
422  */
write_table32hi(FILE * out,const z_word_t FAR * table,int k)423 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
424     int n;
425 
426     for (n = 0; n < k; n++)
427         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
428                 (unsigned long)(table[n] >> 32),
429                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
430 }
431 
432 /*
433   Write the 64-bit values in table[0..k-1] to out, three per line in
434   hexadecimal separated by commas. This assumes that if there is a 64-bit
435   type, then there is also a long long integer type, and it is at least 64
436   bits. If not, then the type cast and format string can be adjusted
437   accordingly.
438  */
write_table64(FILE * out,const z_word_t FAR * table,int k)439 local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
440     int n;
441 
442     for (n = 0; n < k; n++)
443         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
444                 (unsigned long long)(table[n]),
445                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
446 }
447 
448 /* Actually do the deed. */
main(void)449 int main(void) {
450     make_crc_table();
451     return 0;
452 }
453 
454 #endif /* MAKECRCH */
455 
456 #ifdef W
457 /*
458   Generate the little and big-endian braid tables for the given n and z_word_t
459   size w. Each array must have room for w blocks of 256 elements.
460  */
braid(z_crc_t ltl[][256],z_word_t big[][256],int n,int w)461 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
462     int k;
463     z_crc_t i, p, q;
464     for (k = 0; k < w; k++) {
465         p = (z_crc_t)x2nmodp((n * w + 3 - k) << 3, 0);
466         ltl[k][0] = 0;
467         big[w - 1 - k][0] = 0;
468         for (i = 1; i < 256; i++) {
469             ltl[k][i] = q = (z_crc_t)multmodp(i << 24, p);
470             big[w - 1 - k][i] = byte_swap(q);
471         }
472     }
473 }
474 #endif
475 
476 #endif /* DYNAMIC_CRC_TABLE */
477 
478 /* =========================================================================
479  * This function can be used by asm versions of crc32(), and to force the
480  * generation of the CRC tables in a threaded application.
481  */
get_crc_table(void)482 const z_crc_t FAR * ZEXPORT get_crc_table(void) {
483 #ifdef DYNAMIC_CRC_TABLE
484     z_once(&made, make_crc_table);
485 #endif /* DYNAMIC_CRC_TABLE */
486     return (const z_crc_t FAR *)crc_table;
487 }
488 
489 /* =========================================================================
490  * Use ARM machine instructions if available. This will compute the CRC about
491  * ten times faster than the braided calculation. This code does not check for
492  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
493  * only be defined if the compilation specifies an ARM processor architecture
494  * that has the instructions. For example, compiling with -march=armv8.1-a or
495  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
496  * instructions.
497  */
498 #ifdef ARMCRC32
499 
500 /*
501    Constants empirically determined to maximize speed. These values are from
502    measurements on a Cortex-A57. Your mileage may vary.
503  */
504 #define Z_BATCH 3990                /* number of words in a batch */
505 #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
506 #define Z_BATCH_MIN 800             /* fewest words in a final batch */
507 
crc32_z(uLong crc,const unsigned char FAR * buf,z_size_t len)508 uLong ZEXPORT crc32_z(uLong crc, const unsigned char FAR *buf, z_size_t len) {
509     uLong val;
510     z_word_t crc1, crc2;
511     const z_word_t *word;
512     z_word_t val0, val1, val2;
513     z_size_t last, last2, i;
514     z_size_t num;
515 
516     /* Return initial CRC, if requested. */
517     if (buf == Z_NULL) return 0;
518 
519 #ifdef DYNAMIC_CRC_TABLE
520     z_once(&made, make_crc_table);
521 #endif /* DYNAMIC_CRC_TABLE */
522 
523     /* Pre-condition the CRC */
524     crc = (~crc) & 0xffffffff;
525 
526     /* Compute the CRC up to a word boundary. */
527     while (len && ((z_size_t)buf & 7) != 0) {
528         len--;
529         val = *buf++;
530         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
531     }
532 
533     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
534     word = (z_word_t const *)buf;
535     num = len >> 3;
536     len &= 7;
537 
538     /* Do three interleaved CRCs to realize the throughput of one crc32x
539        instruction per cycle. Each CRC is calculated on Z_BATCH words. The
540        three CRCs are combined into a single CRC after each set of batches. */
541     while (num >= 3 * Z_BATCH) {
542         crc1 = 0;
543         crc2 = 0;
544         for (i = 0; i < Z_BATCH; i++) {
545             val0 = word[i];
546             val1 = word[i + Z_BATCH];
547             val2 = word[i + 2 * Z_BATCH];
548             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
549             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
550             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
551         }
552         word += 3 * Z_BATCH;
553         num -= 3 * Z_BATCH;
554         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
555         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
556     }
557 
558     /* Do one last smaller batch with the remaining words, if there are enough
559        to pay for the combination of CRCs. */
560     last = num / 3;
561     if (last >= Z_BATCH_MIN) {
562         last2 = last << 1;
563         crc1 = 0;
564         crc2 = 0;
565         for (i = 0; i < last; i++) {
566             val0 = word[i];
567             val1 = word[i + last];
568             val2 = word[i + last2];
569             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
570             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
571             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
572         }
573         word += 3 * last;
574         num -= 3 * last;
575         val = x2nmodp((int)last, 6);
576         crc = multmodp(val, crc) ^ crc1;
577         crc = multmodp(val, crc) ^ crc2;
578     }
579 
580     /* Compute the CRC on any remaining words. */
581     for (i = 0; i < num; i++) {
582         val0 = word[i];
583         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
584     }
585     word += num;
586 
587     /* Complete the CRC on any remaining bytes. */
588     buf = (const unsigned char FAR *)word;
589     while (len) {
590         len--;
591         val = *buf++;
592         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
593     }
594 
595     /* Return the CRC, post-conditioned. */
596     return crc ^ 0xffffffff;
597 }
598 
599 #else
600 
601 #ifdef W
602 
603 /*
604   Return the CRC of the W bytes in the word_t data, taking the
605   least-significant byte of the word as the first byte of data, without any pre
606   or post conditioning. This is used to combine the CRCs of each braid.
607  */
crc_word(z_word_t data)608 local z_crc_t crc_word(z_word_t data) {
609     int k;
610     for (k = 0; k < W; k++)
611         data = (data >> 8) ^ crc_table[data & 0xff];
612     return (z_crc_t)data;
613 }
614 
crc_word_big(z_word_t data)615 local z_word_t crc_word_big(z_word_t data) {
616     int k;
617     for (k = 0; k < W; k++)
618         data = (data << 8) ^
619             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
620     return data;
621 }
622 
623 #endif
624 
625 /* ========================================================================= */
crc32_z(uLong crc,const unsigned char FAR * buf,z_size_t len)626 uLong ZEXPORT crc32_z(uLong crc, const unsigned char FAR *buf, z_size_t len) {
627     /* Return initial CRC, if requested. */
628     if (buf == Z_NULL) return 0;
629 
630 #ifdef DYNAMIC_CRC_TABLE
631     z_once(&made, make_crc_table);
632 #endif /* DYNAMIC_CRC_TABLE */
633 
634     /* Pre-condition the CRC */
635     crc = (~crc) & 0xffffffff;
636 
637 #ifdef W
638 
639     /* If provided enough bytes, do a braided CRC calculation. */
640     if (len >= N * W + W - 1) {
641         z_size_t blks;
642         z_word_t const *words;
643         unsigned endian;
644         int k;
645 
646         /* Compute the CRC up to a z_word_t boundary. */
647         while (len && ((z_size_t)buf & (W - 1)) != 0) {
648             len--;
649             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
650         }
651 
652         /* Compute the CRC on as many N z_word_t blocks as are available. */
653         blks = len / (N * W);
654         len -= blks * N * W;
655         words = (z_word_t const *)buf;
656 
657         /* Do endian check at execution time instead of compile time, since ARM
658            processors can change the endianness at execution time. If the
659            compiler knows what the endianness will be, it can optimize out the
660            check and the unused branch. */
661         endian = 1;
662         if (*(unsigned char *)&endian) {
663             /* Little endian. */
664 
665             z_crc_t crc0;
666             z_word_t word0;
667 #if N > 1
668             z_crc_t crc1;
669             z_word_t word1;
670 #if N > 2
671             z_crc_t crc2;
672             z_word_t word2;
673 #if N > 3
674             z_crc_t crc3;
675             z_word_t word3;
676 #if N > 4
677             z_crc_t crc4;
678             z_word_t word4;
679 #if N > 5
680             z_crc_t crc5;
681             z_word_t word5;
682 #endif
683 #endif
684 #endif
685 #endif
686 #endif
687 
688             /* Initialize the CRC for each braid. */
689             crc0 = crc;
690 #if N > 1
691             crc1 = 0;
692 #if N > 2
693             crc2 = 0;
694 #if N > 3
695             crc3 = 0;
696 #if N > 4
697             crc4 = 0;
698 #if N > 5
699             crc5 = 0;
700 #endif
701 #endif
702 #endif
703 #endif
704 #endif
705 
706             /*
707               Process the first blks-1 blocks, computing the CRCs on each braid
708               independently.
709              */
710             while (--blks) {
711                 /* Load the word for each braid into registers. */
712                 word0 = crc0 ^ words[0];
713 #if N > 1
714                 word1 = crc1 ^ words[1];
715 #if N > 2
716                 word2 = crc2 ^ words[2];
717 #if N > 3
718                 word3 = crc3 ^ words[3];
719 #if N > 4
720                 word4 = crc4 ^ words[4];
721 #if N > 5
722                 word5 = crc5 ^ words[5];
723 #endif
724 #endif
725 #endif
726 #endif
727 #endif
728                 words += N;
729 
730                 /* Compute and update the CRC for each word. The loop should
731                    get unrolled. */
732                 crc0 = crc_braid_table[0][word0 & 0xff];
733 #if N > 1
734                 crc1 = crc_braid_table[0][word1 & 0xff];
735 #if N > 2
736                 crc2 = crc_braid_table[0][word2 & 0xff];
737 #if N > 3
738                 crc3 = crc_braid_table[0][word3 & 0xff];
739 #if N > 4
740                 crc4 = crc_braid_table[0][word4 & 0xff];
741 #if N > 5
742                 crc5 = crc_braid_table[0][word5 & 0xff];
743 #endif
744 #endif
745 #endif
746 #endif
747 #endif
748                 for (k = 1; k < W; k++) {
749                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
750 #if N > 1
751                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
752 #if N > 2
753                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
754 #if N > 3
755                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
756 #if N > 4
757                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
758 #if N > 5
759                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
760 #endif
761 #endif
762 #endif
763 #endif
764 #endif
765                 }
766             }
767 
768             /*
769               Process the last block, combining the CRCs of the N braids at the
770               same time.
771              */
772             crc = crc_word(crc0 ^ words[0]);
773 #if N > 1
774             crc = crc_word(crc1 ^ words[1] ^ crc);
775 #if N > 2
776             crc = crc_word(crc2 ^ words[2] ^ crc);
777 #if N > 3
778             crc = crc_word(crc3 ^ words[3] ^ crc);
779 #if N > 4
780             crc = crc_word(crc4 ^ words[4] ^ crc);
781 #if N > 5
782             crc = crc_word(crc5 ^ words[5] ^ crc);
783 #endif
784 #endif
785 #endif
786 #endif
787 #endif
788             words += N;
789         }
790         else {
791             /* Big endian. */
792 
793             z_word_t crc0, word0, comb;
794 #if N > 1
795             z_word_t crc1, word1;
796 #if N > 2
797             z_word_t crc2, word2;
798 #if N > 3
799             z_word_t crc3, word3;
800 #if N > 4
801             z_word_t crc4, word4;
802 #if N > 5
803             z_word_t crc5, word5;
804 #endif
805 #endif
806 #endif
807 #endif
808 #endif
809 
810             /* Initialize the CRC for each braid. */
811             crc0 = byte_swap(crc);
812 #if N > 1
813             crc1 = 0;
814 #if N > 2
815             crc2 = 0;
816 #if N > 3
817             crc3 = 0;
818 #if N > 4
819             crc4 = 0;
820 #if N > 5
821             crc5 = 0;
822 #endif
823 #endif
824 #endif
825 #endif
826 #endif
827 
828             /*
829               Process the first blks-1 blocks, computing the CRCs on each braid
830               independently.
831              */
832             while (--blks) {
833                 /* Load the word for each braid into registers. */
834                 word0 = crc0 ^ words[0];
835 #if N > 1
836                 word1 = crc1 ^ words[1];
837 #if N > 2
838                 word2 = crc2 ^ words[2];
839 #if N > 3
840                 word3 = crc3 ^ words[3];
841 #if N > 4
842                 word4 = crc4 ^ words[4];
843 #if N > 5
844                 word5 = crc5 ^ words[5];
845 #endif
846 #endif
847 #endif
848 #endif
849 #endif
850                 words += N;
851 
852                 /* Compute and update the CRC for each word. The loop should
853                    get unrolled. */
854                 crc0 = crc_braid_big_table[0][word0 & 0xff];
855 #if N > 1
856                 crc1 = crc_braid_big_table[0][word1 & 0xff];
857 #if N > 2
858                 crc2 = crc_braid_big_table[0][word2 & 0xff];
859 #if N > 3
860                 crc3 = crc_braid_big_table[0][word3 & 0xff];
861 #if N > 4
862                 crc4 = crc_braid_big_table[0][word4 & 0xff];
863 #if N > 5
864                 crc5 = crc_braid_big_table[0][word5 & 0xff];
865 #endif
866 #endif
867 #endif
868 #endif
869 #endif
870                 for (k = 1; k < W; k++) {
871                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
872 #if N > 1
873                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
874 #if N > 2
875                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
876 #if N > 3
877                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
878 #if N > 4
879                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
880 #if N > 5
881                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
882 #endif
883 #endif
884 #endif
885 #endif
886 #endif
887                 }
888             }
889 
890             /*
891               Process the last block, combining the CRCs of the N braids at the
892               same time.
893              */
894             comb = crc_word_big(crc0 ^ words[0]);
895 #if N > 1
896             comb = crc_word_big(crc1 ^ words[1] ^ comb);
897 #if N > 2
898             comb = crc_word_big(crc2 ^ words[2] ^ comb);
899 #if N > 3
900             comb = crc_word_big(crc3 ^ words[3] ^ comb);
901 #if N > 4
902             comb = crc_word_big(crc4 ^ words[4] ^ comb);
903 #if N > 5
904             comb = crc_word_big(crc5 ^ words[5] ^ comb);
905 #endif
906 #endif
907 #endif
908 #endif
909 #endif
910             words += N;
911             crc = byte_swap(comb);
912         }
913 
914         /*
915           Update the pointer to the remaining bytes to process.
916          */
917         buf = (unsigned char const *)words;
918     }
919 
920 #endif /* W */
921 
922     /* Complete the computation of the CRC on any remaining bytes. */
923     while (len >= 8) {
924         len -= 8;
925         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
926         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
927         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
928         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
929         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
930         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
931         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
932         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
933     }
934     while (len) {
935         len--;
936         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
937     }
938 
939     /* Return the CRC, post-conditioned. */
940     return crc ^ 0xffffffff;
941 }
942 
943 #endif
944 
945 /* ========================================================================= */
crc32(uLong crc,const unsigned char FAR * buf,uInt len)946 uLong ZEXPORT crc32(uLong crc, const unsigned char FAR *buf, uInt len) {
947     #ifdef HAVE_S390X_VX
948     return crc32_z_hook(crc, buf, len);
949     #endif
950     return crc32_z(crc, buf, len);
951 }
952 
953 /* ========================================================================= */
crc32_combine_gen64(z_off64_t len2)954 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
955     if (len2 < 0)
956         return 0;
957 #ifdef DYNAMIC_CRC_TABLE
958     z_once(&made, make_crc_table);
959 #endif /* DYNAMIC_CRC_TABLE */
960     return x2nmodp(len2, 3);
961 }
962 
963 /* ========================================================================= */
crc32_combine_gen(z_off_t len2)964 uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
965     return crc32_combine_gen64((z_off64_t)len2);
966 }
967 
968 /* ========================================================================= */
crc32_combine_op(uLong crc1,uLong crc2,uLong op)969 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
970     if (op == 0)
971         return 0;
972     return multmodp(op, crc1 & 0xffffffff) ^ (crc2 & 0xffffffff);
973 }
974 
975 /* ========================================================================= */
crc32_combine64(uLong crc1,uLong crc2,z_off64_t len2)976 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
977     return crc32_combine_op(crc1, crc2, crc32_combine_gen64(len2));
978 }
979 
980 /* ========================================================================= */
crc32_combine(uLong crc1,uLong crc2,z_off_t len2)981 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
982     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
983 }
984