1 /* 2 * QEMU float support macros 3 * 4 * Derived from SoftFloat. 5 */ 6 7 /*============================================================================ 8 9 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 10 Arithmetic Package, Release 2b. 11 12 Written by John R. Hauser. This work was made possible in part by the 13 International Computer Science Institute, located at Suite 600, 1947 Center 14 Street, Berkeley, California 94704. Funding was partially provided by the 15 National Science Foundation under grant MIP-9311980. The original version 16 of this code was written as part of a project to build a fixed-point vector 17 processor in collaboration with the University of California at Berkeley, 18 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 19 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ 20 arithmetic/SoftFloat.html'. 21 22 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 23 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 24 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 25 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 26 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 27 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 28 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR 29 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 30 31 Derivative works are acceptable, even for commercial purposes, so long as 32 (1) the source code for the derivative work includes prominent notice that 33 the work is derivative, and (2) the source code includes prominent notice with 34 these four paragraphs for those parts of this code that are retained. 35 36 =============================================================================*/ 37 38 /*---------------------------------------------------------------------------- 39 | Shifts `a' right by the number of bits given in `count'. If any nonzero 40 | bits are shifted off, they are ``jammed'' into the least significant bit of 41 | the result by setting the least significant bit to 1. The value of `count' 42 | can be arbitrarily large; in particular, if `count' is greater than 32, the 43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 44 | The result is stored in the location pointed to by `zPtr'. 45 *----------------------------------------------------------------------------*/ 46 47 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 48 { 49 bits32 z; 50 51 if ( count == 0 ) { 52 z = a; 53 } 54 else if ( count < 32 ) { 55 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 56 } 57 else { 58 z = ( a != 0 ); 59 } 60 *zPtr = z; 61 62 } 63 64 /*---------------------------------------------------------------------------- 65 | Shifts `a' right by the number of bits given in `count'. If any nonzero 66 | bits are shifted off, they are ``jammed'' into the least significant bit of 67 | the result by setting the least significant bit to 1. The value of `count' 68 | can be arbitrarily large; in particular, if `count' is greater than 64, the 69 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 70 | The result is stored in the location pointed to by `zPtr'. 71 *----------------------------------------------------------------------------*/ 72 73 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) 74 { 75 bits64 z; 76 77 if ( count == 0 ) { 78 z = a; 79 } 80 else if ( count < 64 ) { 81 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 82 } 83 else { 84 z = ( a != 0 ); 85 } 86 *zPtr = z; 87 88 } 89 90 /*---------------------------------------------------------------------------- 91 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 92 | _plus_ the number of bits given in `count'. The shifted result is at most 93 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 94 | bits shifted off form a second 64-bit result as follows: The _last_ bit 95 | shifted off is the most-significant bit of the extra result, and the other 96 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 97 | bits shifted off were all zero. This extra result is stored in the location 98 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 99 | (This routine makes more sense if `a0' and `a1' are considered to form 100 | a fixed-point value with binary point between `a0' and `a1'. This fixed- 101 | point value is shifted right by the number of bits given in `count', and 102 | the integer part of the result is returned at the location pointed to by 103 | `z0Ptr'. The fractional part of the result may be slightly corrupted as 104 | described above, and is returned at the location pointed to by `z1Ptr'.) 105 *----------------------------------------------------------------------------*/ 106 107 INLINE void 108 shift64ExtraRightJamming( 109 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 110 { 111 bits64 z0, z1; 112 int8 negCount = ( - count ) & 63; 113 114 if ( count == 0 ) { 115 z1 = a1; 116 z0 = a0; 117 } 118 else if ( count < 64 ) { 119 z1 = ( a0<<negCount ) | ( a1 != 0 ); 120 z0 = a0>>count; 121 } 122 else { 123 if ( count == 64 ) { 124 z1 = a0 | ( a1 != 0 ); 125 } 126 else { 127 z1 = ( ( a0 | a1 ) != 0 ); 128 } 129 z0 = 0; 130 } 131 *z1Ptr = z1; 132 *z0Ptr = z0; 133 134 } 135 136 /*---------------------------------------------------------------------------- 137 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 138 | number of bits given in `count'. Any bits shifted off are lost. The value 139 | of `count' can be arbitrarily large; in particular, if `count' is greater 140 | than 128, the result will be 0. The result is broken into two 64-bit pieces 141 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 142 *----------------------------------------------------------------------------*/ 143 144 INLINE void 145 shift128Right( 146 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 147 { 148 bits64 z0, z1; 149 int8 negCount = ( - count ) & 63; 150 151 if ( count == 0 ) { 152 z1 = a1; 153 z0 = a0; 154 } 155 else if ( count < 64 ) { 156 z1 = ( a0<<negCount ) | ( a1>>count ); 157 z0 = a0>>count; 158 } 159 else { 160 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 161 z0 = 0; 162 } 163 *z1Ptr = z1; 164 *z0Ptr = z0; 165 166 } 167 168 /*---------------------------------------------------------------------------- 169 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 170 | number of bits given in `count'. If any nonzero bits are shifted off, they 171 | are ``jammed'' into the least significant bit of the result by setting the 172 | least significant bit to 1. The value of `count' can be arbitrarily large; 173 | in particular, if `count' is greater than 128, the result will be either 174 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 175 | nonzero. The result is broken into two 64-bit pieces which are stored at 176 | the locations pointed to by `z0Ptr' and `z1Ptr'. 177 *----------------------------------------------------------------------------*/ 178 179 INLINE void 180 shift128RightJamming( 181 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 182 { 183 bits64 z0, z1; 184 int8 negCount = ( - count ) & 63; 185 186 if ( count == 0 ) { 187 z1 = a1; 188 z0 = a0; 189 } 190 else if ( count < 64 ) { 191 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 192 z0 = a0>>count; 193 } 194 else { 195 if ( count == 64 ) { 196 z1 = a0 | ( a1 != 0 ); 197 } 198 else if ( count < 128 ) { 199 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 200 } 201 else { 202 z1 = ( ( a0 | a1 ) != 0 ); 203 } 204 z0 = 0; 205 } 206 *z1Ptr = z1; 207 *z0Ptr = z0; 208 209 } 210 211 /*---------------------------------------------------------------------------- 212 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 213 | by 64 _plus_ the number of bits given in `count'. The shifted result is 214 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 215 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 216 | off form a third 64-bit result as follows: The _last_ bit shifted off is 217 | the most-significant bit of the extra result, and the other 63 bits of the 218 | extra result are all zero if and only if _all_but_the_last_ bits shifted off 219 | were all zero. This extra result is stored in the location pointed to by 220 | `z2Ptr'. The value of `count' can be arbitrarily large. 221 | (This routine makes more sense if `a0', `a1', and `a2' are considered 222 | to form a fixed-point value with binary point between `a1' and `a2'. This 223 | fixed-point value is shifted right by the number of bits given in `count', 224 | and the integer part of the result is returned at the locations pointed to 225 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 226 | corrupted as described above, and is returned at the location pointed to by 227 | `z2Ptr'.) 228 *----------------------------------------------------------------------------*/ 229 230 INLINE void 231 shift128ExtraRightJamming( 232 bits64 a0, 233 bits64 a1, 234 bits64 a2, 235 int16 count, 236 bits64 *z0Ptr, 237 bits64 *z1Ptr, 238 bits64 *z2Ptr 239 ) 240 { 241 bits64 z0, z1, z2; 242 int8 negCount = ( - count ) & 63; 243 244 if ( count == 0 ) { 245 z2 = a2; 246 z1 = a1; 247 z0 = a0; 248 } 249 else { 250 if ( count < 64 ) { 251 z2 = a1<<negCount; 252 z1 = ( a0<<negCount ) | ( a1>>count ); 253 z0 = a0>>count; 254 } 255 else { 256 if ( count == 64 ) { 257 z2 = a1; 258 z1 = a0; 259 } 260 else { 261 a2 |= a1; 262 if ( count < 128 ) { 263 z2 = a0<<negCount; 264 z1 = a0>>( count & 63 ); 265 } 266 else { 267 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 268 z1 = 0; 269 } 270 } 271 z0 = 0; 272 } 273 z2 |= ( a2 != 0 ); 274 } 275 *z2Ptr = z2; 276 *z1Ptr = z1; 277 *z0Ptr = z0; 278 279 } 280 281 /*---------------------------------------------------------------------------- 282 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 283 | number of bits given in `count'. Any bits shifted off are lost. The value 284 | of `count' must be less than 64. The result is broken into two 64-bit 285 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 286 *----------------------------------------------------------------------------*/ 287 288 INLINE void 289 shortShift128Left( 290 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 291 { 292 293 *z1Ptr = a1<<count; 294 *z0Ptr = 295 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 296 297 } 298 299 /*---------------------------------------------------------------------------- 300 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 301 | by the number of bits given in `count'. Any bits shifted off are lost. 302 | The value of `count' must be less than 64. The result is broken into three 303 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 304 | `z1Ptr', and `z2Ptr'. 305 *----------------------------------------------------------------------------*/ 306 307 INLINE void 308 shortShift192Left( 309 bits64 a0, 310 bits64 a1, 311 bits64 a2, 312 int16 count, 313 bits64 *z0Ptr, 314 bits64 *z1Ptr, 315 bits64 *z2Ptr 316 ) 317 { 318 bits64 z0, z1, z2; 319 int8 negCount; 320 321 z2 = a2<<count; 322 z1 = a1<<count; 323 z0 = a0<<count; 324 if ( 0 < count ) { 325 negCount = ( ( - count ) & 63 ); 326 z1 |= a2>>negCount; 327 z0 |= a1>>negCount; 328 } 329 *z2Ptr = z2; 330 *z1Ptr = z1; 331 *z0Ptr = z0; 332 333 } 334 335 /*---------------------------------------------------------------------------- 336 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 337 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 338 | any carry out is lost. The result is broken into two 64-bit pieces which 339 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 340 *----------------------------------------------------------------------------*/ 341 342 INLINE void 343 add128( 344 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 345 { 346 bits64 z1; 347 348 z1 = a1 + b1; 349 *z1Ptr = z1; 350 *z0Ptr = a0 + b0 + ( z1 < a1 ); 351 352 } 353 354 /*---------------------------------------------------------------------------- 355 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 356 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 357 | modulo 2^192, so any carry out is lost. The result is broken into three 358 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 359 | `z1Ptr', and `z2Ptr'. 360 *----------------------------------------------------------------------------*/ 361 362 INLINE void 363 add192( 364 bits64 a0, 365 bits64 a1, 366 bits64 a2, 367 bits64 b0, 368 bits64 b1, 369 bits64 b2, 370 bits64 *z0Ptr, 371 bits64 *z1Ptr, 372 bits64 *z2Ptr 373 ) 374 { 375 bits64 z0, z1, z2; 376 int8 carry0, carry1; 377 378 z2 = a2 + b2; 379 carry1 = ( z2 < a2 ); 380 z1 = a1 + b1; 381 carry0 = ( z1 < a1 ); 382 z0 = a0 + b0; 383 z1 += carry1; 384 z0 += ( z1 < carry1 ); 385 z0 += carry0; 386 *z2Ptr = z2; 387 *z1Ptr = z1; 388 *z0Ptr = z0; 389 390 } 391 392 /*---------------------------------------------------------------------------- 393 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 394 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 395 | 2^128, so any borrow out (carry out) is lost. The result is broken into two 396 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 397 | `z1Ptr'. 398 *----------------------------------------------------------------------------*/ 399 400 INLINE void 401 sub128( 402 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 403 { 404 405 *z1Ptr = a1 - b1; 406 *z0Ptr = a0 - b0 - ( a1 < b1 ); 407 408 } 409 410 /*---------------------------------------------------------------------------- 411 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 412 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 413 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 414 | result is broken into three 64-bit pieces which are stored at the locations 415 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 416 *----------------------------------------------------------------------------*/ 417 418 INLINE void 419 sub192( 420 bits64 a0, 421 bits64 a1, 422 bits64 a2, 423 bits64 b0, 424 bits64 b1, 425 bits64 b2, 426 bits64 *z0Ptr, 427 bits64 *z1Ptr, 428 bits64 *z2Ptr 429 ) 430 { 431 bits64 z0, z1, z2; 432 int8 borrow0, borrow1; 433 434 z2 = a2 - b2; 435 borrow1 = ( a2 < b2 ); 436 z1 = a1 - b1; 437 borrow0 = ( a1 < b1 ); 438 z0 = a0 - b0; 439 z0 -= ( z1 < borrow1 ); 440 z1 -= borrow1; 441 z0 -= borrow0; 442 *z2Ptr = z2; 443 *z1Ptr = z1; 444 *z0Ptr = z0; 445 446 } 447 448 /*---------------------------------------------------------------------------- 449 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 450 | into two 64-bit pieces which are stored at the locations pointed to by 451 | `z0Ptr' and `z1Ptr'. 452 *----------------------------------------------------------------------------*/ 453 454 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) 455 { 456 bits32 aHigh, aLow, bHigh, bLow; 457 bits64 z0, zMiddleA, zMiddleB, z1; 458 459 aLow = a; 460 aHigh = a>>32; 461 bLow = b; 462 bHigh = b>>32; 463 z1 = ( (bits64) aLow ) * bLow; 464 zMiddleA = ( (bits64) aLow ) * bHigh; 465 zMiddleB = ( (bits64) aHigh ) * bLow; 466 z0 = ( (bits64) aHigh ) * bHigh; 467 zMiddleA += zMiddleB; 468 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 469 zMiddleA <<= 32; 470 z1 += zMiddleA; 471 z0 += ( z1 < zMiddleA ); 472 *z1Ptr = z1; 473 *z0Ptr = z0; 474 475 } 476 477 /*---------------------------------------------------------------------------- 478 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 479 | `b' to obtain a 192-bit product. The product is broken into three 64-bit 480 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 481 | `z2Ptr'. 482 *----------------------------------------------------------------------------*/ 483 484 INLINE void 485 mul128By64To192( 486 bits64 a0, 487 bits64 a1, 488 bits64 b, 489 bits64 *z0Ptr, 490 bits64 *z1Ptr, 491 bits64 *z2Ptr 492 ) 493 { 494 bits64 z0, z1, z2, more1; 495 496 mul64To128( a1, b, &z1, &z2 ); 497 mul64To128( a0, b, &z0, &more1 ); 498 add128( z0, more1, 0, z1, &z0, &z1 ); 499 *z2Ptr = z2; 500 *z1Ptr = z1; 501 *z0Ptr = z0; 502 503 } 504 505 /*---------------------------------------------------------------------------- 506 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 507 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 508 | product. The product is broken into four 64-bit pieces which are stored at 509 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 510 *----------------------------------------------------------------------------*/ 511 512 INLINE void 513 mul128To256( 514 bits64 a0, 515 bits64 a1, 516 bits64 b0, 517 bits64 b1, 518 bits64 *z0Ptr, 519 bits64 *z1Ptr, 520 bits64 *z2Ptr, 521 bits64 *z3Ptr 522 ) 523 { 524 bits64 z0, z1, z2, z3; 525 bits64 more1, more2; 526 527 mul64To128( a1, b1, &z2, &z3 ); 528 mul64To128( a1, b0, &z1, &more2 ); 529 add128( z1, more2, 0, z2, &z1, &z2 ); 530 mul64To128( a0, b0, &z0, &more1 ); 531 add128( z0, more1, 0, z1, &z0, &z1 ); 532 mul64To128( a0, b1, &more1, &more2 ); 533 add128( more1, more2, 0, z2, &more1, &z2 ); 534 add128( z0, z1, 0, more1, &z0, &z1 ); 535 *z3Ptr = z3; 536 *z2Ptr = z2; 537 *z1Ptr = z1; 538 *z0Ptr = z0; 539 540 } 541 542 /*---------------------------------------------------------------------------- 543 | Returns an approximation to the 64-bit integer quotient obtained by dividing 544 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 545 | divisor `b' must be at least 2^63. If q is the exact quotient truncated 546 | toward zero, the approximation returned lies between q and q + 2 inclusive. 547 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 548 | unsigned integer is returned. 549 *----------------------------------------------------------------------------*/ 550 551 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) 552 { 553 bits64 b0, b1; 554 bits64 rem0, rem1, term0, term1; 555 bits64 z; 556 557 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 558 b0 = b>>32; 559 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 560 mul64To128( b, z, &term0, &term1 ); 561 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 562 while ( ( (sbits64) rem0 ) < 0 ) { 563 z -= LIT64( 0x100000000 ); 564 b1 = b<<32; 565 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 566 } 567 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 568 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 569 return z; 570 571 } 572 573 /*---------------------------------------------------------------------------- 574 | Returns an approximation to the square root of the 32-bit significand given 575 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 576 | `aExp' (the least significant bit) is 1, the integer returned approximates 577 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 578 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 579 | case, the approximation returned lies strictly within +/-2 of the exact 580 | value. 581 *----------------------------------------------------------------------------*/ 582 583 static bits32 estimateSqrt32( int16 aExp, bits32 a ) 584 { 585 static const bits16 sqrtOddAdjustments[] = { 586 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 587 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 588 }; 589 static const bits16 sqrtEvenAdjustments[] = { 590 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 591 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 592 }; 593 int8 index; 594 bits32 z; 595 596 index = ( a>>27 ) & 15; 597 if ( aExp & 1 ) { 598 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; 599 z = ( ( a / z )<<14 ) + ( z<<15 ); 600 a >>= 1; 601 } 602 else { 603 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; 604 z = a / z + z; 605 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 606 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); 607 } 608 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); 609 610 } 611 612 /*---------------------------------------------------------------------------- 613 | Returns the number of leading 0 bits before the most-significant 1 bit of 614 | `a'. If `a' is zero, 32 is returned. 615 *----------------------------------------------------------------------------*/ 616 617 static int8 countLeadingZeros32( bits32 a ) 618 { 619 static const int8 countLeadingZerosHigh[] = { 620 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 621 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 622 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 623 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 624 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 625 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 627 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 635 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 636 }; 637 int8 shiftCount; 638 639 shiftCount = 0; 640 if ( a < 0x10000 ) { 641 shiftCount += 16; 642 a <<= 16; 643 } 644 if ( a < 0x1000000 ) { 645 shiftCount += 8; 646 a <<= 8; 647 } 648 shiftCount += countLeadingZerosHigh[ a>>24 ]; 649 return shiftCount; 650 651 } 652 653 /*---------------------------------------------------------------------------- 654 | Returns the number of leading 0 bits before the most-significant 1 bit of 655 | `a'. If `a' is zero, 64 is returned. 656 *----------------------------------------------------------------------------*/ 657 658 static int8 countLeadingZeros64( bits64 a ) 659 { 660 int8 shiftCount; 661 662 shiftCount = 0; 663 if ( a < ( (bits64) 1 )<<32 ) { 664 shiftCount += 32; 665 } 666 else { 667 a >>= 32; 668 } 669 shiftCount += countLeadingZeros32( a ); 670 return shiftCount; 671 672 } 673 674 /*---------------------------------------------------------------------------- 675 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 676 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. 677 | Otherwise, returns 0. 678 *----------------------------------------------------------------------------*/ 679 680 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 681 { 682 683 return ( a0 == b0 ) && ( a1 == b1 ); 684 685 } 686 687 /*---------------------------------------------------------------------------- 688 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 689 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 690 | Otherwise, returns 0. 691 *----------------------------------------------------------------------------*/ 692 693 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 694 { 695 696 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 697 698 } 699 700 /*---------------------------------------------------------------------------- 701 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 702 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 703 | returns 0. 704 *----------------------------------------------------------------------------*/ 705 706 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 707 { 708 709 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 710 711 } 712 713 /*---------------------------------------------------------------------------- 714 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 715 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. 716 | Otherwise, returns 0. 717 *----------------------------------------------------------------------------*/ 718 719 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) 720 { 721 722 return ( a0 != b0 ) || ( a1 != b1 ); 723 724 } 725