xref: /qemu/include/fpu/softfloat-macros.h (revision 8d725fac63c31562cdc25e332634a6583ca7b9b5)
1 /*
2  * QEMU float support macros
3  *
4  * Derived from SoftFloat.
5  */
6 
7 /*============================================================================
8 
9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10 Arithmetic Package, Release 2b.
11 
12 Written by John R. Hauser.  This work was made possible in part by the
13 International Computer Science Institute, located at Suite 600, 1947 Center
14 Street, Berkeley, California 94704.  Funding was partially provided by the
15 National Science Foundation under grant MIP-9311980.  The original version
16 of this code was written as part of a project to build a fixed-point vector
17 processor in collaboration with the University of California at Berkeley,
18 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 arithmetic/SoftFloat.html'.
21 
22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 
31 Derivative works are acceptable, even for commercial purposes, so long as
32 (1) the source code for the derivative work includes prominent notice that
33 the work is derivative, and (2) the source code includes prominent notice with
34 these four paragraphs for those parts of this code that are retained.
35 
36 =============================================================================*/
37 
38 /*----------------------------------------------------------------------------
39 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
40 | bits are shifted off, they are ``jammed'' into the least significant bit of
41 | the result by setting the least significant bit to 1.  The value of `count'
42 | can be arbitrarily large; in particular, if `count' is greater than 32, the
43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
44 | The result is stored in the location pointed to by `zPtr'.
45 *----------------------------------------------------------------------------*/
46 
47 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
48 {
49     bits32 z;
50 
51     if ( count == 0 ) {
52         z = a;
53     }
54     else if ( count < 32 ) {
55         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
56     }
57     else {
58         z = ( a != 0 );
59     }
60     *zPtr = z;
61 
62 }
63 
64 /*----------------------------------------------------------------------------
65 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
66 | bits are shifted off, they are ``jammed'' into the least significant bit of
67 | the result by setting the least significant bit to 1.  The value of `count'
68 | can be arbitrarily large; in particular, if `count' is greater than 64, the
69 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
70 | The result is stored in the location pointed to by `zPtr'.
71 *----------------------------------------------------------------------------*/
72 
73 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
74 {
75     bits64 z;
76 
77     if ( count == 0 ) {
78         z = a;
79     }
80     else if ( count < 64 ) {
81         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
82     }
83     else {
84         z = ( a != 0 );
85     }
86     *zPtr = z;
87 
88 }
89 
90 /*----------------------------------------------------------------------------
91 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
92 | _plus_ the number of bits given in `count'.  The shifted result is at most
93 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
94 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
95 | shifted off is the most-significant bit of the extra result, and the other
96 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
97 | bits shifted off were all zero.  This extra result is stored in the location
98 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
99 |     (This routine makes more sense if `a0' and `a1' are considered to form
100 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
101 | point value is shifted right by the number of bits given in `count', and
102 | the integer part of the result is returned at the location pointed to by
103 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
104 | described above, and is returned at the location pointed to by `z1Ptr'.)
105 *----------------------------------------------------------------------------*/
106 
107 INLINE void
108  shift64ExtraRightJamming(
109      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
110 {
111     bits64 z0, z1;
112     int8 negCount = ( - count ) & 63;
113 
114     if ( count == 0 ) {
115         z1 = a1;
116         z0 = a0;
117     }
118     else if ( count < 64 ) {
119         z1 = ( a0<<negCount ) | ( a1 != 0 );
120         z0 = a0>>count;
121     }
122     else {
123         if ( count == 64 ) {
124             z1 = a0 | ( a1 != 0 );
125         }
126         else {
127             z1 = ( ( a0 | a1 ) != 0 );
128         }
129         z0 = 0;
130     }
131     *z1Ptr = z1;
132     *z0Ptr = z0;
133 
134 }
135 
136 /*----------------------------------------------------------------------------
137 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138 | number of bits given in `count'.  Any bits shifted off are lost.  The value
139 | of `count' can be arbitrarily large; in particular, if `count' is greater
140 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
141 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142 *----------------------------------------------------------------------------*/
143 
144 INLINE void
145  shift128Right(
146      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
147 {
148     bits64 z0, z1;
149     int8 negCount = ( - count ) & 63;
150 
151     if ( count == 0 ) {
152         z1 = a1;
153         z0 = a0;
154     }
155     else if ( count < 64 ) {
156         z1 = ( a0<<negCount ) | ( a1>>count );
157         z0 = a0>>count;
158     }
159     else {
160         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
161         z0 = 0;
162     }
163     *z1Ptr = z1;
164     *z0Ptr = z0;
165 
166 }
167 
168 /*----------------------------------------------------------------------------
169 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170 | number of bits given in `count'.  If any nonzero bits are shifted off, they
171 | are ``jammed'' into the least significant bit of the result by setting the
172 | least significant bit to 1.  The value of `count' can be arbitrarily large;
173 | in particular, if `count' is greater than 128, the result will be either
174 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175 | nonzero.  The result is broken into two 64-bit pieces which are stored at
176 | the locations pointed to by `z0Ptr' and `z1Ptr'.
177 *----------------------------------------------------------------------------*/
178 
179 INLINE void
180  shift128RightJamming(
181      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
182 {
183     bits64 z0, z1;
184     int8 negCount = ( - count ) & 63;
185 
186     if ( count == 0 ) {
187         z1 = a1;
188         z0 = a0;
189     }
190     else if ( count < 64 ) {
191         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
192         z0 = a0>>count;
193     }
194     else {
195         if ( count == 64 ) {
196             z1 = a0 | ( a1 != 0 );
197         }
198         else if ( count < 128 ) {
199             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
200         }
201         else {
202             z1 = ( ( a0 | a1 ) != 0 );
203         }
204         z0 = 0;
205     }
206     *z1Ptr = z1;
207     *z0Ptr = z0;
208 
209 }
210 
211 /*----------------------------------------------------------------------------
212 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
213 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
214 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
215 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
216 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
217 | the most-significant bit of the extra result, and the other 63 bits of the
218 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
219 | were all zero.  This extra result is stored in the location pointed to by
220 | `z2Ptr'.  The value of `count' can be arbitrarily large.
221 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
222 | to form a fixed-point value with binary point between `a1' and `a2'.  This
223 | fixed-point value is shifted right by the number of bits given in `count',
224 | and the integer part of the result is returned at the locations pointed to
225 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
226 | corrupted as described above, and is returned at the location pointed to by
227 | `z2Ptr'.)
228 *----------------------------------------------------------------------------*/
229 
230 INLINE void
231  shift128ExtraRightJamming(
232      bits64 a0,
233      bits64 a1,
234      bits64 a2,
235      int16 count,
236      bits64 *z0Ptr,
237      bits64 *z1Ptr,
238      bits64 *z2Ptr
239  )
240 {
241     bits64 z0, z1, z2;
242     int8 negCount = ( - count ) & 63;
243 
244     if ( count == 0 ) {
245         z2 = a2;
246         z1 = a1;
247         z0 = a0;
248     }
249     else {
250         if ( count < 64 ) {
251             z2 = a1<<negCount;
252             z1 = ( a0<<negCount ) | ( a1>>count );
253             z0 = a0>>count;
254         }
255         else {
256             if ( count == 64 ) {
257                 z2 = a1;
258                 z1 = a0;
259             }
260             else {
261                 a2 |= a1;
262                 if ( count < 128 ) {
263                     z2 = a0<<negCount;
264                     z1 = a0>>( count & 63 );
265                 }
266                 else {
267                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
268                     z1 = 0;
269                 }
270             }
271             z0 = 0;
272         }
273         z2 |= ( a2 != 0 );
274     }
275     *z2Ptr = z2;
276     *z1Ptr = z1;
277     *z0Ptr = z0;
278 
279 }
280 
281 /*----------------------------------------------------------------------------
282 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283 | number of bits given in `count'.  Any bits shifted off are lost.  The value
284 | of `count' must be less than 64.  The result is broken into two 64-bit
285 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286 *----------------------------------------------------------------------------*/
287 
288 INLINE void
289  shortShift128Left(
290      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
291 {
292 
293     *z1Ptr = a1<<count;
294     *z0Ptr =
295         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
296 
297 }
298 
299 /*----------------------------------------------------------------------------
300 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
301 | by the number of bits given in `count'.  Any bits shifted off are lost.
302 | The value of `count' must be less than 64.  The result is broken into three
303 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
304 | `z1Ptr', and `z2Ptr'.
305 *----------------------------------------------------------------------------*/
306 
307 INLINE void
308  shortShift192Left(
309      bits64 a0,
310      bits64 a1,
311      bits64 a2,
312      int16 count,
313      bits64 *z0Ptr,
314      bits64 *z1Ptr,
315      bits64 *z2Ptr
316  )
317 {
318     bits64 z0, z1, z2;
319     int8 negCount;
320 
321     z2 = a2<<count;
322     z1 = a1<<count;
323     z0 = a0<<count;
324     if ( 0 < count ) {
325         negCount = ( ( - count ) & 63 );
326         z1 |= a2>>negCount;
327         z0 |= a1>>negCount;
328     }
329     *z2Ptr = z2;
330     *z1Ptr = z1;
331     *z0Ptr = z0;
332 
333 }
334 
335 /*----------------------------------------------------------------------------
336 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
337 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
338 | any carry out is lost.  The result is broken into two 64-bit pieces which
339 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
340 *----------------------------------------------------------------------------*/
341 
342 INLINE void
343  add128(
344      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
345 {
346     bits64 z1;
347 
348     z1 = a1 + b1;
349     *z1Ptr = z1;
350     *z0Ptr = a0 + b0 + ( z1 < a1 );
351 
352 }
353 
354 /*----------------------------------------------------------------------------
355 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
356 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
357 | modulo 2^192, so any carry out is lost.  The result is broken into three
358 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
359 | `z1Ptr', and `z2Ptr'.
360 *----------------------------------------------------------------------------*/
361 
362 INLINE void
363  add192(
364      bits64 a0,
365      bits64 a1,
366      bits64 a2,
367      bits64 b0,
368      bits64 b1,
369      bits64 b2,
370      bits64 *z0Ptr,
371      bits64 *z1Ptr,
372      bits64 *z2Ptr
373  )
374 {
375     bits64 z0, z1, z2;
376     int8 carry0, carry1;
377 
378     z2 = a2 + b2;
379     carry1 = ( z2 < a2 );
380     z1 = a1 + b1;
381     carry0 = ( z1 < a1 );
382     z0 = a0 + b0;
383     z1 += carry1;
384     z0 += ( z1 < carry1 );
385     z0 += carry0;
386     *z2Ptr = z2;
387     *z1Ptr = z1;
388     *z0Ptr = z0;
389 
390 }
391 
392 /*----------------------------------------------------------------------------
393 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
394 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
395 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
396 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
397 | `z1Ptr'.
398 *----------------------------------------------------------------------------*/
399 
400 INLINE void
401  sub128(
402      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
403 {
404 
405     *z1Ptr = a1 - b1;
406     *z0Ptr = a0 - b0 - ( a1 < b1 );
407 
408 }
409 
410 /*----------------------------------------------------------------------------
411 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
412 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
413 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
414 | result is broken into three 64-bit pieces which are stored at the locations
415 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
416 *----------------------------------------------------------------------------*/
417 
418 INLINE void
419  sub192(
420      bits64 a0,
421      bits64 a1,
422      bits64 a2,
423      bits64 b0,
424      bits64 b1,
425      bits64 b2,
426      bits64 *z0Ptr,
427      bits64 *z1Ptr,
428      bits64 *z2Ptr
429  )
430 {
431     bits64 z0, z1, z2;
432     int8 borrow0, borrow1;
433 
434     z2 = a2 - b2;
435     borrow1 = ( a2 < b2 );
436     z1 = a1 - b1;
437     borrow0 = ( a1 < b1 );
438     z0 = a0 - b0;
439     z0 -= ( z1 < borrow1 );
440     z1 -= borrow1;
441     z0 -= borrow0;
442     *z2Ptr = z2;
443     *z1Ptr = z1;
444     *z0Ptr = z0;
445 
446 }
447 
448 /*----------------------------------------------------------------------------
449 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
450 | into two 64-bit pieces which are stored at the locations pointed to by
451 | `z0Ptr' and `z1Ptr'.
452 *----------------------------------------------------------------------------*/
453 
454 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
455 {
456     bits32 aHigh, aLow, bHigh, bLow;
457     bits64 z0, zMiddleA, zMiddleB, z1;
458 
459     aLow = a;
460     aHigh = a>>32;
461     bLow = b;
462     bHigh = b>>32;
463     z1 = ( (bits64) aLow ) * bLow;
464     zMiddleA = ( (bits64) aLow ) * bHigh;
465     zMiddleB = ( (bits64) aHigh ) * bLow;
466     z0 = ( (bits64) aHigh ) * bHigh;
467     zMiddleA += zMiddleB;
468     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
469     zMiddleA <<= 32;
470     z1 += zMiddleA;
471     z0 += ( z1 < zMiddleA );
472     *z1Ptr = z1;
473     *z0Ptr = z0;
474 
475 }
476 
477 /*----------------------------------------------------------------------------
478 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
479 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
480 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
481 | `z2Ptr'.
482 *----------------------------------------------------------------------------*/
483 
484 INLINE void
485  mul128By64To192(
486      bits64 a0,
487      bits64 a1,
488      bits64 b,
489      bits64 *z0Ptr,
490      bits64 *z1Ptr,
491      bits64 *z2Ptr
492  )
493 {
494     bits64 z0, z1, z2, more1;
495 
496     mul64To128( a1, b, &z1, &z2 );
497     mul64To128( a0, b, &z0, &more1 );
498     add128( z0, more1, 0, z1, &z0, &z1 );
499     *z2Ptr = z2;
500     *z1Ptr = z1;
501     *z0Ptr = z0;
502 
503 }
504 
505 /*----------------------------------------------------------------------------
506 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
507 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
508 | product.  The product is broken into four 64-bit pieces which are stored at
509 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
510 *----------------------------------------------------------------------------*/
511 
512 INLINE void
513  mul128To256(
514      bits64 a0,
515      bits64 a1,
516      bits64 b0,
517      bits64 b1,
518      bits64 *z0Ptr,
519      bits64 *z1Ptr,
520      bits64 *z2Ptr,
521      bits64 *z3Ptr
522  )
523 {
524     bits64 z0, z1, z2, z3;
525     bits64 more1, more2;
526 
527     mul64To128( a1, b1, &z2, &z3 );
528     mul64To128( a1, b0, &z1, &more2 );
529     add128( z1, more2, 0, z2, &z1, &z2 );
530     mul64To128( a0, b0, &z0, &more1 );
531     add128( z0, more1, 0, z1, &z0, &z1 );
532     mul64To128( a0, b1, &more1, &more2 );
533     add128( more1, more2, 0, z2, &more1, &z2 );
534     add128( z0, z1, 0, more1, &z0, &z1 );
535     *z3Ptr = z3;
536     *z2Ptr = z2;
537     *z1Ptr = z1;
538     *z0Ptr = z0;
539 
540 }
541 
542 /*----------------------------------------------------------------------------
543 | Returns an approximation to the 64-bit integer quotient obtained by dividing
544 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
545 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
546 | toward zero, the approximation returned lies between q and q + 2 inclusive.
547 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
548 | unsigned integer is returned.
549 *----------------------------------------------------------------------------*/
550 
551 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
552 {
553     bits64 b0, b1;
554     bits64 rem0, rem1, term0, term1;
555     bits64 z;
556 
557     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
558     b0 = b>>32;
559     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
560     mul64To128( b, z, &term0, &term1 );
561     sub128( a0, a1, term0, term1, &rem0, &rem1 );
562     while ( ( (sbits64) rem0 ) < 0 ) {
563         z -= LIT64( 0x100000000 );
564         b1 = b<<32;
565         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
566     }
567     rem0 = ( rem0<<32 ) | ( rem1>>32 );
568     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
569     return z;
570 
571 }
572 
573 /*----------------------------------------------------------------------------
574 | Returns an approximation to the square root of the 32-bit significand given
575 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
576 | `aExp' (the least significant bit) is 1, the integer returned approximates
577 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
578 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
579 | case, the approximation returned lies strictly within +/-2 of the exact
580 | value.
581 *----------------------------------------------------------------------------*/
582 
583 static bits32 estimateSqrt32( int16 aExp, bits32 a )
584 {
585     static const bits16 sqrtOddAdjustments[] = {
586         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
587         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
588     };
589     static const bits16 sqrtEvenAdjustments[] = {
590         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
591         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
592     };
593     int8 index;
594     bits32 z;
595 
596     index = ( a>>27 ) & 15;
597     if ( aExp & 1 ) {
598         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
599         z = ( ( a / z )<<14 ) + ( z<<15 );
600         a >>= 1;
601     }
602     else {
603         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
604         z = a / z + z;
605         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
606         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
607     }
608     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
609 
610 }
611 
612 /*----------------------------------------------------------------------------
613 | Returns the number of leading 0 bits before the most-significant 1 bit of
614 | `a'.  If `a' is zero, 32 is returned.
615 *----------------------------------------------------------------------------*/
616 
617 static int8 countLeadingZeros32( bits32 a )
618 {
619     static const int8 countLeadingZerosHigh[] = {
620         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
621         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
622         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
623         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
624         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
625         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
626         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
627         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
628         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
632         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
633         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
635         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
636     };
637     int8 shiftCount;
638 
639     shiftCount = 0;
640     if ( a < 0x10000 ) {
641         shiftCount += 16;
642         a <<= 16;
643     }
644     if ( a < 0x1000000 ) {
645         shiftCount += 8;
646         a <<= 8;
647     }
648     shiftCount += countLeadingZerosHigh[ a>>24 ];
649     return shiftCount;
650 
651 }
652 
653 /*----------------------------------------------------------------------------
654 | Returns the number of leading 0 bits before the most-significant 1 bit of
655 | `a'.  If `a' is zero, 64 is returned.
656 *----------------------------------------------------------------------------*/
657 
658 static int8 countLeadingZeros64( bits64 a )
659 {
660     int8 shiftCount;
661 
662     shiftCount = 0;
663     if ( a < ( (bits64) 1 )<<32 ) {
664         shiftCount += 32;
665     }
666     else {
667         a >>= 32;
668     }
669     shiftCount += countLeadingZeros32( a );
670     return shiftCount;
671 
672 }
673 
674 /*----------------------------------------------------------------------------
675 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
676 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
677 | Otherwise, returns 0.
678 *----------------------------------------------------------------------------*/
679 
680 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
681 {
682 
683     return ( a0 == b0 ) && ( a1 == b1 );
684 
685 }
686 
687 /*----------------------------------------------------------------------------
688 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
689 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
690 | Otherwise, returns 0.
691 *----------------------------------------------------------------------------*/
692 
693 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
694 {
695 
696     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
697 
698 }
699 
700 /*----------------------------------------------------------------------------
701 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
702 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
703 | returns 0.
704 *----------------------------------------------------------------------------*/
705 
706 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707 {
708 
709     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
710 
711 }
712 
713 /*----------------------------------------------------------------------------
714 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
715 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
716 | Otherwise, returns 0.
717 *----------------------------------------------------------------------------*/
718 
719 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
720 {
721 
722     return ( a0 != b0 ) || ( a1 != b1 );
723 
724 }
725