1 /* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2026 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
5 * This interleaved implementation of a CRC makes use of pipelined multiple
6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8 */
9
10 /* @(#) $Id$ */
11
12 /*
13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14 protection on the static variables used to control the first-use generation
15 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16 first call get_crc_table() to initialize the tables before allowing more than
17 one thread to use crc32().
18
19 MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20 produced, so that this one source file can be compiled to an executable.
21 */
22
23 #ifdef MAKECRCH
24 # include <stdio.h>
25 # ifndef DYNAMIC_CRC_TABLE
26 # define DYNAMIC_CRC_TABLE
27 # endif
28 #endif
29 #ifdef DYNAMIC_CRC_TABLE
30 # define Z_ONCE
31 #endif
32
33 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
34
35 #ifdef HAVE_S390X_VX
36 # include "contrib/crc32vx/crc32_vx_hooks.h"
37 #endif
38
39 /*
40 A CRC of a message is computed on N braids of words in the message, where
41 each word consists of W bytes (4 or 8). If N is 3, for example, then three
42 running sparse CRCs are calculated respectively on each braid, at these
43 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
44 This is done starting at a word boundary, and continues until as many blocks
45 of N * W bytes as are available have been processed. The results are combined
46 into a single CRC at the end. For this code, N must be in the range 1..6 and
47 W must be 4 or 8. The upper limit on N can be increased if desired by adding
48 more #if blocks, extending the patterns apparent in the code. In addition,
49 crc32.h would need to be regenerated, if the maximum N value is increased.
50
51 N and W are chosen empirically by benchmarking the execution time on a given
52 processor. The choices for N and W below were based on testing on Intel Kaby
53 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
54 Octeon II processors. The Intel, AMD, and ARM processors were all fastest
55 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
56 They were all tested with either gcc or clang, all using the -O3 optimization
57 level. Your mileage may vary.
58 */
59
60 /* Define N */
61 #ifdef Z_TESTN
62 # define N Z_TESTN
63 #else
64 # define N 5
65 #endif
66 #if N < 1 || N > 6
67 # error N must be in 1..6
68 #endif
69
70 /*
71 z_crc_t must be at least 32 bits. z_word_t must be at least as long as
72 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
73 that bytes are eight bits.
74 */
75
76 /*
77 Define W and the associated z_word_t type. If W is not defined, then a
78 braided calculation is not used, and the associated tables and code are not
79 compiled.
80 */
81 #ifdef Z_TESTW
82 # if Z_TESTW-1 != -1
83 # define W Z_TESTW
84 # endif
85 #else
86 # ifdef MAKECRCH
87 # define W 8 /* required for MAKECRCH */
88 # else
89 # if defined(__x86_64__) || defined(__aarch64__)
90 # define W 8
91 # else
92 # define W 4
93 # endif
94 # endif
95 #endif
96 #ifdef W
97 # if W == 8 && defined(Z_U8)
98 typedef Z_U8 z_word_t;
99 # elif defined(Z_U4)
100 # undef W
101 # define W 4
102 typedef Z_U4 z_word_t;
103 # else
104 # undef W
105 # endif
106 #endif
107
108 /* If available, use the ARM processor CRC32 instruction. */
109 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && \
110 defined(W) && W == 8
111 # define ARMCRC32
112 #endif
113
114 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
115 /*
116 Swap the bytes in a z_word_t to convert between little and big endian. Any
117 self-respecting compiler will optimize this to a single machine byte-swap
118 instruction, if one is available. This assumes that word_t is either 32 bits
119 or 64 bits.
120 */
byte_swap(z_word_t word)121 local z_word_t byte_swap(z_word_t word) {
122 # if W == 8
123 return
124 (word & 0xff00000000000000) >> 56 |
125 (word & 0xff000000000000) >> 40 |
126 (word & 0xff0000000000) >> 24 |
127 (word & 0xff00000000) >> 8 |
128 (word & 0xff000000) << 8 |
129 (word & 0xff0000) << 24 |
130 (word & 0xff00) << 40 |
131 (word & 0xff) << 56;
132 # else /* W == 4 */
133 return
134 (word & 0xff000000) >> 24 |
135 (word & 0xff0000) >> 8 |
136 (word & 0xff00) << 8 |
137 (word & 0xff) << 24;
138 # endif
139 }
140 #endif
141
142 #ifdef DYNAMIC_CRC_TABLE
143 /* =========================================================================
144 * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
145 * below.
146 */
147 local z_crc_t FAR x2n_table[32];
148 #else
149 /* =========================================================================
150 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
151 * of x for combining CRC-32s, all made by make_crc_table().
152 */
153 # include "crc32.h"
154 #endif
155
156 /* CRC polynomial. */
157 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
158
159 /*
160 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
161 reflected. For speed, this requires that a not be zero.
162 */
multmodp(uLong a,uLong b)163 local uLong multmodp(uLong a, uLong b) {
164 uLong m, p;
165
166 m = (uLong)1 << 31;
167 p = 0;
168 for (;;) {
169 if (a & m) {
170 p ^= b;
171 if ((a & (m - 1)) == 0)
172 break;
173 }
174 m >>= 1;
175 b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
176 }
177 return p;
178 }
179
180 /*
181 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
182 initialized. n must not be negative.
183 */
x2nmodp(z_off64_t n,unsigned k)184 local uLong x2nmodp(z_off64_t n, unsigned k) {
185 uLong p;
186
187 p = (uLong)1 << 31; /* x^0 == 1 */
188 while (n) {
189 if (n & 1)
190 p = multmodp(x2n_table[k & 31], p);
191 n >>= 1;
192 k++;
193 }
194 return p;
195 }
196
197 #ifdef DYNAMIC_CRC_TABLE
198 /* =========================================================================
199 * Build the tables for byte-wise and braided CRC-32 calculations, and a table
200 * of powers of x for combining CRC-32s.
201 */
202 local z_crc_t FAR crc_table[256];
203 #ifdef W
204 local z_word_t FAR crc_big_table[256];
205 local z_crc_t FAR crc_braid_table[W][256];
206 local z_word_t FAR crc_braid_big_table[W][256];
207 local void braid(z_crc_t [][256], z_word_t [][256], int, int);
208 #endif
209 #ifdef MAKECRCH
210 local void write_table(FILE *, const z_crc_t FAR *, int);
211 local void write_table32hi(FILE *, const z_word_t FAR *, int);
212 local void write_table64(FILE *, const z_word_t FAR *, int);
213 #endif /* MAKECRCH */
214
215 /* State for once(). */
216 local z_once_t made = Z_ONCE_INIT;
217
218 /*
219 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
220 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
221
222 Polynomials over GF(2) are represented in binary, one bit per coefficient,
223 with the lowest powers in the most significant bit. Then adding polynomials
224 is just exclusive-or, and multiplying a polynomial by x is a right shift by
225 one. If we call the above polynomial p, and represent a byte as the
226 polynomial q, also with the lowest power in the most significant bit (so the
227 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
228 where a mod b means the remainder after dividing a by b.
229
230 This calculation is done using the shift-register method of multiplying and
231 taking the remainder. The register is initialized to zero, and for each
232 incoming bit, x^32 is added mod p to the register if the bit is a one (where
233 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
234 (which is shifting right by one and adding x^32 mod p if the bit shifted out
235 is a one). We start with the highest power (least significant bit) of q and
236 repeat for all eight bits of q.
237
238 The table is simply the CRC of all possible eight bit values. This is all the
239 information needed to generate CRCs on data a byte at a time for all
240 combinations of CRC register values and incoming bytes.
241 */
242
make_crc_table(void)243 local void make_crc_table(void) {
244 unsigned i, j, n;
245 z_crc_t p;
246
247 /* initialize the CRC of bytes tables */
248 for (i = 0; i < 256; i++) {
249 p = i;
250 for (j = 0; j < 8; j++)
251 p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
252 crc_table[i] = p;
253 #ifdef W
254 crc_big_table[i] = byte_swap(p);
255 #endif
256 }
257
258 /* initialize the x^2^n mod p(x) table */
259 p = (z_crc_t)1 << 30; /* x^1 */
260 x2n_table[0] = p;
261 for (n = 1; n < 32; n++)
262 x2n_table[n] = p = (z_crc_t)multmodp(p, p);
263
264 #ifdef W
265 /* initialize the braiding tables -- needs x2n_table[] */
266 braid(crc_braid_table, crc_braid_big_table, N, W);
267 #endif
268
269 #ifdef MAKECRCH
270 {
271 /*
272 The crc32.h header file contains tables for both 32-bit and 64-bit
273 z_word_t's, and so requires a 64-bit type be available. In that case,
274 z_word_t must be defined to be 64-bits. This code then also generates
275 and writes out the tables for the case that z_word_t is 32 bits.
276 */
277 #if !defined(W) || W != 8
278 # error Need a 64-bit integer type in order to generate crc32.h.
279 #endif
280 FILE *out;
281 int k, n;
282 z_crc_t ltl[8][256];
283 z_word_t big[8][256];
284
285 out = fopen("crc32.h", "w");
286 if (out == NULL) return;
287
288 /* write out little-endian CRC table to crc32.h */
289 fprintf(out,
290 "/* crc32.h -- tables for rapid CRC calculation\n"
291 " * Generated automatically by crc32.c\n */\n"
292 "\n"
293 "local const z_crc_t FAR crc_table[] = {\n"
294 " ");
295 write_table(out, crc_table, 256);
296 fprintf(out,
297 "};\n");
298
299 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
300 fprintf(out,
301 "\n"
302 "#ifdef W\n"
303 "\n"
304 "#if W == 8\n"
305 "\n"
306 "local const z_word_t FAR crc_big_table[] = {\n"
307 " ");
308 write_table64(out, crc_big_table, 256);
309 fprintf(out,
310 "};\n");
311
312 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
313 fprintf(out,
314 "\n"
315 "#else /* W == 4 */\n"
316 "\n"
317 "local const z_word_t FAR crc_big_table[] = {\n"
318 " ");
319 write_table32hi(out, crc_big_table, 256);
320 fprintf(out,
321 "};\n"
322 "\n"
323 "#endif\n");
324
325 /* write out braid tables for each value of N */
326 for (n = 1; n <= 6; n++) {
327 fprintf(out,
328 "\n"
329 "#if N == %d\n", n);
330
331 /* compute braid tables for this N and 64-bit word_t */
332 braid(ltl, big, n, 8);
333
334 /* write out braid tables for 64-bit z_word_t to crc32.h */
335 fprintf(out,
336 "\n"
337 "#if W == 8\n"
338 "\n"
339 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
340 for (k = 0; k < 8; k++) {
341 fprintf(out, " {");
342 write_table(out, ltl[k], 256);
343 fprintf(out, "}%s", k < 7 ? ",\n" : "");
344 }
345 fprintf(out,
346 "};\n"
347 "\n"
348 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
349 for (k = 0; k < 8; k++) {
350 fprintf(out, " {");
351 write_table64(out, big[k], 256);
352 fprintf(out, "}%s", k < 7 ? ",\n" : "");
353 }
354 fprintf(out,
355 "};\n");
356
357 /* compute braid tables for this N and 32-bit word_t */
358 braid(ltl, big, n, 4);
359
360 /* write out braid tables for 32-bit z_word_t to crc32.h */
361 fprintf(out,
362 "\n"
363 "#else /* W == 4 */\n"
364 "\n"
365 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
366 for (k = 0; k < 4; k++) {
367 fprintf(out, " {");
368 write_table(out, ltl[k], 256);
369 fprintf(out, "}%s", k < 3 ? ",\n" : "");
370 }
371 fprintf(out,
372 "};\n"
373 "\n"
374 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
375 for (k = 0; k < 4; k++) {
376 fprintf(out, " {");
377 write_table32hi(out, big[k], 256);
378 fprintf(out, "}%s", k < 3 ? ",\n" : "");
379 }
380 fprintf(out,
381 "};\n"
382 "\n"
383 "#endif\n"
384 "\n"
385 "#endif\n");
386 }
387 fprintf(out,
388 "\n"
389 "#endif\n");
390
391 /* write out zeros operator table to crc32.h */
392 fprintf(out,
393 "\n"
394 "local const z_crc_t FAR x2n_table[] = {\n"
395 " ");
396 write_table(out, x2n_table, 32);
397 fprintf(out,
398 "};\n");
399 fclose(out);
400 }
401 #endif /* MAKECRCH */
402 }
403
404 #ifdef MAKECRCH
405
406 /*
407 Write the 32-bit values in table[0..k-1] to out, five per line in
408 hexadecimal separated by commas.
409 */
write_table(FILE * out,const z_crc_t FAR * table,int k)410 local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
411 int n;
412
413 for (n = 0; n < k; n++)
414 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
415 (unsigned long)(table[n]),
416 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
417 }
418
419 /*
420 Write the high 32-bits of each value in table[0..k-1] to out, five per line
421 in hexadecimal separated by commas.
422 */
write_table32hi(FILE * out,const z_word_t FAR * table,int k)423 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
424 int n;
425
426 for (n = 0; n < k; n++)
427 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
428 (unsigned long)(table[n] >> 32),
429 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
430 }
431
432 /*
433 Write the 64-bit values in table[0..k-1] to out, three per line in
434 hexadecimal separated by commas. This assumes that if there is a 64-bit
435 type, then there is also a long long integer type, and it is at least 64
436 bits. If not, then the type cast and format string can be adjusted
437 accordingly.
438 */
write_table64(FILE * out,const z_word_t FAR * table,int k)439 local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
440 int n;
441
442 for (n = 0; n < k; n++)
443 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
444 (unsigned long long)(table[n]),
445 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
446 }
447
448 /* Actually do the deed. */
main(void)449 int main(void) {
450 make_crc_table();
451 return 0;
452 }
453
454 #endif /* MAKECRCH */
455
456 #ifdef W
457 /*
458 Generate the little and big-endian braid tables for the given n and z_word_t
459 size w. Each array must have room for w blocks of 256 elements.
460 */
braid(z_crc_t ltl[][256],z_word_t big[][256],int n,int w)461 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
462 int k;
463 z_crc_t i, p, q;
464 for (k = 0; k < w; k++) {
465 p = (z_crc_t)x2nmodp((n * w + 3 - k) << 3, 0);
466 ltl[k][0] = 0;
467 big[w - 1 - k][0] = 0;
468 for (i = 1; i < 256; i++) {
469 ltl[k][i] = q = (z_crc_t)multmodp(i << 24, p);
470 big[w - 1 - k][i] = byte_swap(q);
471 }
472 }
473 }
474 #endif
475
476 #endif /* DYNAMIC_CRC_TABLE */
477
478 /* =========================================================================
479 * This function can be used by asm versions of crc32(), and to force the
480 * generation of the CRC tables in a threaded application.
481 */
get_crc_table(void)482 const z_crc_t FAR * ZEXPORT get_crc_table(void) {
483 #ifdef DYNAMIC_CRC_TABLE
484 z_once(&made, make_crc_table);
485 #endif /* DYNAMIC_CRC_TABLE */
486 return (const z_crc_t FAR *)crc_table;
487 }
488
489 /* =========================================================================
490 * Use ARM machine instructions if available. This will compute the CRC about
491 * ten times faster than the braided calculation. This code does not check for
492 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
493 * only be defined if the compilation specifies an ARM processor architecture
494 * that has the instructions. For example, compiling with -march=armv8.1-a or
495 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
496 * instructions.
497 */
498 #ifdef ARMCRC32
499
500 /*
501 Constants empirically determined to maximize speed. These values are from
502 measurements on a Cortex-A57. Your mileage may vary.
503 */
504 #define Z_BATCH 3990 /* number of words in a batch */
505 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
506 #define Z_BATCH_MIN 800 /* fewest words in a final batch */
507
crc32_z(uLong crc,const unsigned char FAR * buf,z_size_t len)508 uLong ZEXPORT crc32_z(uLong crc, const unsigned char FAR *buf, z_size_t len) {
509 uLong val;
510 z_word_t crc1, crc2;
511 const z_word_t *word;
512 z_word_t val0, val1, val2;
513 z_size_t last, last2, i;
514 z_size_t num;
515
516 /* Return initial CRC, if requested. */
517 if (buf == Z_NULL) return 0;
518
519 #ifdef DYNAMIC_CRC_TABLE
520 z_once(&made, make_crc_table);
521 #endif /* DYNAMIC_CRC_TABLE */
522
523 /* Pre-condition the CRC */
524 crc = (~crc) & 0xffffffff;
525
526 /* Compute the CRC up to a word boundary. */
527 while (len && ((z_size_t)buf & 7) != 0) {
528 len--;
529 val = *buf++;
530 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
531 }
532
533 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
534 word = (z_word_t const *)buf;
535 num = len >> 3;
536 len &= 7;
537
538 /* Do three interleaved CRCs to realize the throughput of one crc32x
539 instruction per cycle. Each CRC is calculated on Z_BATCH words. The
540 three CRCs are combined into a single CRC after each set of batches. */
541 while (num >= 3 * Z_BATCH) {
542 crc1 = 0;
543 crc2 = 0;
544 for (i = 0; i < Z_BATCH; i++) {
545 val0 = word[i];
546 val1 = word[i + Z_BATCH];
547 val2 = word[i + 2 * Z_BATCH];
548 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
549 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
550 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
551 }
552 word += 3 * Z_BATCH;
553 num -= 3 * Z_BATCH;
554 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
555 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
556 }
557
558 /* Do one last smaller batch with the remaining words, if there are enough
559 to pay for the combination of CRCs. */
560 last = num / 3;
561 if (last >= Z_BATCH_MIN) {
562 last2 = last << 1;
563 crc1 = 0;
564 crc2 = 0;
565 for (i = 0; i < last; i++) {
566 val0 = word[i];
567 val1 = word[i + last];
568 val2 = word[i + last2];
569 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
570 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
571 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
572 }
573 word += 3 * last;
574 num -= 3 * last;
575 val = x2nmodp((int)last, 6);
576 crc = multmodp(val, crc) ^ crc1;
577 crc = multmodp(val, crc) ^ crc2;
578 }
579
580 /* Compute the CRC on any remaining words. */
581 for (i = 0; i < num; i++) {
582 val0 = word[i];
583 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
584 }
585 word += num;
586
587 /* Complete the CRC on any remaining bytes. */
588 buf = (const unsigned char FAR *)word;
589 while (len) {
590 len--;
591 val = *buf++;
592 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
593 }
594
595 /* Return the CRC, post-conditioned. */
596 return crc ^ 0xffffffff;
597 }
598
599 #else
600
601 #ifdef W
602
603 /*
604 Return the CRC of the W bytes in the word_t data, taking the
605 least-significant byte of the word as the first byte of data, without any pre
606 or post conditioning. This is used to combine the CRCs of each braid.
607 */
crc_word(z_word_t data)608 local z_crc_t crc_word(z_word_t data) {
609 int k;
610 for (k = 0; k < W; k++)
611 data = (data >> 8) ^ crc_table[data & 0xff];
612 return (z_crc_t)data;
613 }
614
crc_word_big(z_word_t data)615 local z_word_t crc_word_big(z_word_t data) {
616 int k;
617 for (k = 0; k < W; k++)
618 data = (data << 8) ^
619 crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
620 return data;
621 }
622
623 #endif
624
625 /* ========================================================================= */
crc32_z(uLong crc,const unsigned char FAR * buf,z_size_t len)626 uLong ZEXPORT crc32_z(uLong crc, const unsigned char FAR *buf, z_size_t len) {
627 /* Return initial CRC, if requested. */
628 if (buf == Z_NULL) return 0;
629
630 #ifdef DYNAMIC_CRC_TABLE
631 z_once(&made, make_crc_table);
632 #endif /* DYNAMIC_CRC_TABLE */
633
634 /* Pre-condition the CRC */
635 crc = (~crc) & 0xffffffff;
636
637 #ifdef W
638
639 /* If provided enough bytes, do a braided CRC calculation. */
640 if (len >= N * W + W - 1) {
641 z_size_t blks;
642 z_word_t const *words;
643 unsigned endian;
644 int k;
645
646 /* Compute the CRC up to a z_word_t boundary. */
647 while (len && ((z_size_t)buf & (W - 1)) != 0) {
648 len--;
649 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
650 }
651
652 /* Compute the CRC on as many N z_word_t blocks as are available. */
653 blks = len / (N * W);
654 len -= blks * N * W;
655 words = (z_word_t const *)buf;
656
657 /* Do endian check at execution time instead of compile time, since ARM
658 processors can change the endianness at execution time. If the
659 compiler knows what the endianness will be, it can optimize out the
660 check and the unused branch. */
661 endian = 1;
662 if (*(unsigned char *)&endian) {
663 /* Little endian. */
664
665 z_crc_t crc0;
666 z_word_t word0;
667 #if N > 1
668 z_crc_t crc1;
669 z_word_t word1;
670 #if N > 2
671 z_crc_t crc2;
672 z_word_t word2;
673 #if N > 3
674 z_crc_t crc3;
675 z_word_t word3;
676 #if N > 4
677 z_crc_t crc4;
678 z_word_t word4;
679 #if N > 5
680 z_crc_t crc5;
681 z_word_t word5;
682 #endif
683 #endif
684 #endif
685 #endif
686 #endif
687
688 /* Initialize the CRC for each braid. */
689 crc0 = crc;
690 #if N > 1
691 crc1 = 0;
692 #if N > 2
693 crc2 = 0;
694 #if N > 3
695 crc3 = 0;
696 #if N > 4
697 crc4 = 0;
698 #if N > 5
699 crc5 = 0;
700 #endif
701 #endif
702 #endif
703 #endif
704 #endif
705
706 /*
707 Process the first blks-1 blocks, computing the CRCs on each braid
708 independently.
709 */
710 while (--blks) {
711 /* Load the word for each braid into registers. */
712 word0 = crc0 ^ words[0];
713 #if N > 1
714 word1 = crc1 ^ words[1];
715 #if N > 2
716 word2 = crc2 ^ words[2];
717 #if N > 3
718 word3 = crc3 ^ words[3];
719 #if N > 4
720 word4 = crc4 ^ words[4];
721 #if N > 5
722 word5 = crc5 ^ words[5];
723 #endif
724 #endif
725 #endif
726 #endif
727 #endif
728 words += N;
729
730 /* Compute and update the CRC for each word. The loop should
731 get unrolled. */
732 crc0 = crc_braid_table[0][word0 & 0xff];
733 #if N > 1
734 crc1 = crc_braid_table[0][word1 & 0xff];
735 #if N > 2
736 crc2 = crc_braid_table[0][word2 & 0xff];
737 #if N > 3
738 crc3 = crc_braid_table[0][word3 & 0xff];
739 #if N > 4
740 crc4 = crc_braid_table[0][word4 & 0xff];
741 #if N > 5
742 crc5 = crc_braid_table[0][word5 & 0xff];
743 #endif
744 #endif
745 #endif
746 #endif
747 #endif
748 for (k = 1; k < W; k++) {
749 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
750 #if N > 1
751 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
752 #if N > 2
753 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
754 #if N > 3
755 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
756 #if N > 4
757 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
758 #if N > 5
759 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
760 #endif
761 #endif
762 #endif
763 #endif
764 #endif
765 }
766 }
767
768 /*
769 Process the last block, combining the CRCs of the N braids at the
770 same time.
771 */
772 crc = crc_word(crc0 ^ words[0]);
773 #if N > 1
774 crc = crc_word(crc1 ^ words[1] ^ crc);
775 #if N > 2
776 crc = crc_word(crc2 ^ words[2] ^ crc);
777 #if N > 3
778 crc = crc_word(crc3 ^ words[3] ^ crc);
779 #if N > 4
780 crc = crc_word(crc4 ^ words[4] ^ crc);
781 #if N > 5
782 crc = crc_word(crc5 ^ words[5] ^ crc);
783 #endif
784 #endif
785 #endif
786 #endif
787 #endif
788 words += N;
789 }
790 else {
791 /* Big endian. */
792
793 z_word_t crc0, word0, comb;
794 #if N > 1
795 z_word_t crc1, word1;
796 #if N > 2
797 z_word_t crc2, word2;
798 #if N > 3
799 z_word_t crc3, word3;
800 #if N > 4
801 z_word_t crc4, word4;
802 #if N > 5
803 z_word_t crc5, word5;
804 #endif
805 #endif
806 #endif
807 #endif
808 #endif
809
810 /* Initialize the CRC for each braid. */
811 crc0 = byte_swap(crc);
812 #if N > 1
813 crc1 = 0;
814 #if N > 2
815 crc2 = 0;
816 #if N > 3
817 crc3 = 0;
818 #if N > 4
819 crc4 = 0;
820 #if N > 5
821 crc5 = 0;
822 #endif
823 #endif
824 #endif
825 #endif
826 #endif
827
828 /*
829 Process the first blks-1 blocks, computing the CRCs on each braid
830 independently.
831 */
832 while (--blks) {
833 /* Load the word for each braid into registers. */
834 word0 = crc0 ^ words[0];
835 #if N > 1
836 word1 = crc1 ^ words[1];
837 #if N > 2
838 word2 = crc2 ^ words[2];
839 #if N > 3
840 word3 = crc3 ^ words[3];
841 #if N > 4
842 word4 = crc4 ^ words[4];
843 #if N > 5
844 word5 = crc5 ^ words[5];
845 #endif
846 #endif
847 #endif
848 #endif
849 #endif
850 words += N;
851
852 /* Compute and update the CRC for each word. The loop should
853 get unrolled. */
854 crc0 = crc_braid_big_table[0][word0 & 0xff];
855 #if N > 1
856 crc1 = crc_braid_big_table[0][word1 & 0xff];
857 #if N > 2
858 crc2 = crc_braid_big_table[0][word2 & 0xff];
859 #if N > 3
860 crc3 = crc_braid_big_table[0][word3 & 0xff];
861 #if N > 4
862 crc4 = crc_braid_big_table[0][word4 & 0xff];
863 #if N > 5
864 crc5 = crc_braid_big_table[0][word5 & 0xff];
865 #endif
866 #endif
867 #endif
868 #endif
869 #endif
870 for (k = 1; k < W; k++) {
871 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
872 #if N > 1
873 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
874 #if N > 2
875 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
876 #if N > 3
877 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
878 #if N > 4
879 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
880 #if N > 5
881 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
882 #endif
883 #endif
884 #endif
885 #endif
886 #endif
887 }
888 }
889
890 /*
891 Process the last block, combining the CRCs of the N braids at the
892 same time.
893 */
894 comb = crc_word_big(crc0 ^ words[0]);
895 #if N > 1
896 comb = crc_word_big(crc1 ^ words[1] ^ comb);
897 #if N > 2
898 comb = crc_word_big(crc2 ^ words[2] ^ comb);
899 #if N > 3
900 comb = crc_word_big(crc3 ^ words[3] ^ comb);
901 #if N > 4
902 comb = crc_word_big(crc4 ^ words[4] ^ comb);
903 #if N > 5
904 comb = crc_word_big(crc5 ^ words[5] ^ comb);
905 #endif
906 #endif
907 #endif
908 #endif
909 #endif
910 words += N;
911 crc = byte_swap(comb);
912 }
913
914 /*
915 Update the pointer to the remaining bytes to process.
916 */
917 buf = (unsigned char const *)words;
918 }
919
920 #endif /* W */
921
922 /* Complete the computation of the CRC on any remaining bytes. */
923 while (len >= 8) {
924 len -= 8;
925 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
926 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
927 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
928 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
929 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
930 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
931 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
932 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
933 }
934 while (len) {
935 len--;
936 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
937 }
938
939 /* Return the CRC, post-conditioned. */
940 return crc ^ 0xffffffff;
941 }
942
943 #endif
944
945 /* ========================================================================= */
crc32(uLong crc,const unsigned char FAR * buf,uInt len)946 uLong ZEXPORT crc32(uLong crc, const unsigned char FAR *buf, uInt len) {
947 #ifdef HAVE_S390X_VX
948 return crc32_z_hook(crc, buf, len);
949 #endif
950 return crc32_z(crc, buf, len);
951 }
952
953 /* ========================================================================= */
crc32_combine_gen64(z_off64_t len2)954 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
955 if (len2 < 0)
956 return 0;
957 #ifdef DYNAMIC_CRC_TABLE
958 z_once(&made, make_crc_table);
959 #endif /* DYNAMIC_CRC_TABLE */
960 return x2nmodp(len2, 3);
961 }
962
963 /* ========================================================================= */
crc32_combine_gen(z_off_t len2)964 uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
965 return crc32_combine_gen64((z_off64_t)len2);
966 }
967
968 /* ========================================================================= */
crc32_combine_op(uLong crc1,uLong crc2,uLong op)969 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
970 if (op == 0)
971 return 0;
972 return multmodp(op, crc1 & 0xffffffff) ^ (crc2 & 0xffffffff);
973 }
974
975 /* ========================================================================= */
crc32_combine64(uLong crc1,uLong crc2,z_off64_t len2)976 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
977 return crc32_combine_op(crc1, crc2, crc32_combine_gen64(len2));
978 }
979
980 /* ========================================================================= */
crc32_combine(uLong crc1,uLong crc2,z_off_t len2)981 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
982 return crc32_combine64(crc1, crc2, (z_off64_t)len2);
983 }
984