1 /* 2 * QEMU float support macros 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18 /* 19 =============================================================================== 20 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 21 Arithmetic Package, Release 2a. 22 23 Written by John R. Hauser. This work was made possible in part by the 24 International Computer Science Institute, located at Suite 600, 1947 Center 25 Street, Berkeley, California 94704. Funding was partially provided by the 26 National Science Foundation under grant MIP-9311980. The original version 27 of this code was written as part of a project to build a fixed-point vector 28 processor in collaboration with the University of California at Berkeley, 29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31 arithmetic/SoftFloat.html'. 32 33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38 39 Derivative works are acceptable, even for commercial purposes, so long as 40 (1) they include prominent notice that the work is derivative, and (2) they 41 include prominent notice akin to these four paragraphs for those parts of 42 this code that are retained. 43 44 =============================================================================== 45 */ 46 47 /* BSD licensing: 48 * Copyright (c) 2006, Fabrice Bellard 49 * All rights reserved. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions are met: 53 * 54 * 1. Redistributions of source code must retain the above copyright notice, 55 * this list of conditions and the following disclaimer. 56 * 57 * 2. Redistributions in binary form must reproduce the above copyright notice, 58 * this list of conditions and the following disclaimer in the documentation 59 * and/or other materials provided with the distribution. 60 * 61 * 3. Neither the name of the copyright holder nor the names of its contributors 62 * may be used to endorse or promote products derived from this software without 63 * specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 75 * THE POSSIBILITY OF SUCH DAMAGE. 76 */ 77 78 /* Portions of this work are licensed under the terms of the GNU GPL, 79 * version 2 or later. See the COPYING file in the top-level directory. 80 */ 81 82 /*---------------------------------------------------------------------------- 83 | Shifts `a' right by the number of bits given in `count'. If any nonzero 84 | bits are shifted off, they are ``jammed'' into the least significant bit of 85 | the result by setting the least significant bit to 1. The value of `count' 86 | can be arbitrarily large; in particular, if `count' is greater than 32, the 87 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 88 | The result is stored in the location pointed to by `zPtr'. 89 *----------------------------------------------------------------------------*/ 90 91 static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) 92 { 93 uint32_t z; 94 95 if ( count == 0 ) { 96 z = a; 97 } 98 else if ( count < 32 ) { 99 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 100 } 101 else { 102 z = ( a != 0 ); 103 } 104 *zPtr = z; 105 106 } 107 108 /*---------------------------------------------------------------------------- 109 | Shifts `a' right by the number of bits given in `count'. If any nonzero 110 | bits are shifted off, they are ``jammed'' into the least significant bit of 111 | the result by setting the least significant bit to 1. The value of `count' 112 | can be arbitrarily large; in particular, if `count' is greater than 64, the 113 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 114 | The result is stored in the location pointed to by `zPtr'. 115 *----------------------------------------------------------------------------*/ 116 117 static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) 118 { 119 uint64_t z; 120 121 if ( count == 0 ) { 122 z = a; 123 } 124 else if ( count < 64 ) { 125 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 126 } 127 else { 128 z = ( a != 0 ); 129 } 130 *zPtr = z; 131 132 } 133 134 /*---------------------------------------------------------------------------- 135 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 136 | _plus_ the number of bits given in `count'. The shifted result is at most 137 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 138 | bits shifted off form a second 64-bit result as follows: The _last_ bit 139 | shifted off is the most-significant bit of the extra result, and the other 140 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 141 | bits shifted off were all zero. This extra result is stored in the location 142 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 143 | (This routine makes more sense if `a0' and `a1' are considered to form a 144 | fixed-point value with binary point between `a0' and `a1'. This fixed-point 145 | value is shifted right by the number of bits given in `count', and the 146 | integer part of the result is returned at the location pointed to by 147 | `z0Ptr'. The fractional part of the result may be slightly corrupted as 148 | described above, and is returned at the location pointed to by `z1Ptr'.) 149 *----------------------------------------------------------------------------*/ 150 151 static inline void 152 shift64ExtraRightJamming( 153 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 154 { 155 uint64_t z0, z1; 156 int8_t negCount = ( - count ) & 63; 157 158 if ( count == 0 ) { 159 z1 = a1; 160 z0 = a0; 161 } 162 else if ( count < 64 ) { 163 z1 = ( a0<<negCount ) | ( a1 != 0 ); 164 z0 = a0>>count; 165 } 166 else { 167 if ( count == 64 ) { 168 z1 = a0 | ( a1 != 0 ); 169 } 170 else { 171 z1 = ( ( a0 | a1 ) != 0 ); 172 } 173 z0 = 0; 174 } 175 *z1Ptr = z1; 176 *z0Ptr = z0; 177 178 } 179 180 /*---------------------------------------------------------------------------- 181 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 182 | number of bits given in `count'. Any bits shifted off are lost. The value 183 | of `count' can be arbitrarily large; in particular, if `count' is greater 184 | than 128, the result will be 0. The result is broken into two 64-bit pieces 185 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 186 *----------------------------------------------------------------------------*/ 187 188 static inline void 189 shift128Right( 190 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 191 { 192 uint64_t z0, z1; 193 int8_t negCount = ( - count ) & 63; 194 195 if ( count == 0 ) { 196 z1 = a1; 197 z0 = a0; 198 } 199 else if ( count < 64 ) { 200 z1 = ( a0<<negCount ) | ( a1>>count ); 201 z0 = a0>>count; 202 } 203 else { 204 z1 = (count < 128) ? (a0 >> (count & 63)) : 0; 205 z0 = 0; 206 } 207 *z1Ptr = z1; 208 *z0Ptr = z0; 209 210 } 211 212 /*---------------------------------------------------------------------------- 213 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 214 | number of bits given in `count'. If any nonzero bits are shifted off, they 215 | are ``jammed'' into the least significant bit of the result by setting the 216 | least significant bit to 1. The value of `count' can be arbitrarily large; 217 | in particular, if `count' is greater than 128, the result will be either 218 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 219 | nonzero. The result is broken into two 64-bit pieces which are stored at 220 | the locations pointed to by `z0Ptr' and `z1Ptr'. 221 *----------------------------------------------------------------------------*/ 222 223 static inline void 224 shift128RightJamming( 225 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 226 { 227 uint64_t z0, z1; 228 int8_t negCount = ( - count ) & 63; 229 230 if ( count == 0 ) { 231 z1 = a1; 232 z0 = a0; 233 } 234 else if ( count < 64 ) { 235 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 236 z0 = a0>>count; 237 } 238 else { 239 if ( count == 64 ) { 240 z1 = a0 | ( a1 != 0 ); 241 } 242 else if ( count < 128 ) { 243 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 244 } 245 else { 246 z1 = ( ( a0 | a1 ) != 0 ); 247 } 248 z0 = 0; 249 } 250 *z1Ptr = z1; 251 *z0Ptr = z0; 252 253 } 254 255 /*---------------------------------------------------------------------------- 256 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 257 | by 64 _plus_ the number of bits given in `count'. The shifted result is 258 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 259 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 260 | off form a third 64-bit result as follows: The _last_ bit shifted off is 261 | the most-significant bit of the extra result, and the other 63 bits of the 262 | extra result are all zero if and only if _all_but_the_last_ bits shifted off 263 | were all zero. This extra result is stored in the location pointed to by 264 | `z2Ptr'. The value of `count' can be arbitrarily large. 265 | (This routine makes more sense if `a0', `a1', and `a2' are considered 266 | to form a fixed-point value with binary point between `a1' and `a2'. This 267 | fixed-point value is shifted right by the number of bits given in `count', 268 | and the integer part of the result is returned at the locations pointed to 269 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 270 | corrupted as described above, and is returned at the location pointed to by 271 | `z2Ptr'.) 272 *----------------------------------------------------------------------------*/ 273 274 static inline void 275 shift128ExtraRightJamming( 276 uint64_t a0, 277 uint64_t a1, 278 uint64_t a2, 279 int count, 280 uint64_t *z0Ptr, 281 uint64_t *z1Ptr, 282 uint64_t *z2Ptr 283 ) 284 { 285 uint64_t z0, z1, z2; 286 int8_t negCount = ( - count ) & 63; 287 288 if ( count == 0 ) { 289 z2 = a2; 290 z1 = a1; 291 z0 = a0; 292 } 293 else { 294 if ( count < 64 ) { 295 z2 = a1<<negCount; 296 z1 = ( a0<<negCount ) | ( a1>>count ); 297 z0 = a0>>count; 298 } 299 else { 300 if ( count == 64 ) { 301 z2 = a1; 302 z1 = a0; 303 } 304 else { 305 a2 |= a1; 306 if ( count < 128 ) { 307 z2 = a0<<negCount; 308 z1 = a0>>( count & 63 ); 309 } 310 else { 311 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 312 z1 = 0; 313 } 314 } 315 z0 = 0; 316 } 317 z2 |= ( a2 != 0 ); 318 } 319 *z2Ptr = z2; 320 *z1Ptr = z1; 321 *z0Ptr = z0; 322 323 } 324 325 /*---------------------------------------------------------------------------- 326 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 327 | number of bits given in `count'. Any bits shifted off are lost. The value 328 | of `count' must be less than 64. The result is broken into two 64-bit 329 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 330 *----------------------------------------------------------------------------*/ 331 332 static inline void 333 shortShift128Left( 334 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 335 { 336 337 *z1Ptr = a1<<count; 338 *z0Ptr = 339 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 340 341 } 342 343 /*---------------------------------------------------------------------------- 344 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 345 | by the number of bits given in `count'. Any bits shifted off are lost. 346 | The value of `count' must be less than 64. The result is broken into three 347 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 348 | `z1Ptr', and `z2Ptr'. 349 *----------------------------------------------------------------------------*/ 350 351 static inline void 352 shortShift192Left( 353 uint64_t a0, 354 uint64_t a1, 355 uint64_t a2, 356 int count, 357 uint64_t *z0Ptr, 358 uint64_t *z1Ptr, 359 uint64_t *z2Ptr 360 ) 361 { 362 uint64_t z0, z1, z2; 363 int8_t negCount; 364 365 z2 = a2<<count; 366 z1 = a1<<count; 367 z0 = a0<<count; 368 if ( 0 < count ) { 369 negCount = ( ( - count ) & 63 ); 370 z1 |= a2>>negCount; 371 z0 |= a1>>negCount; 372 } 373 *z2Ptr = z2; 374 *z1Ptr = z1; 375 *z0Ptr = z0; 376 377 } 378 379 /*---------------------------------------------------------------------------- 380 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 381 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 382 | any carry out is lost. The result is broken into two 64-bit pieces which 383 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 384 *----------------------------------------------------------------------------*/ 385 386 static inline void 387 add128( 388 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) 389 { 390 uint64_t z1; 391 392 z1 = a1 + b1; 393 *z1Ptr = z1; 394 *z0Ptr = a0 + b0 + ( z1 < a1 ); 395 396 } 397 398 /*---------------------------------------------------------------------------- 399 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 400 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 401 | modulo 2^192, so any carry out is lost. The result is broken into three 402 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 403 | `z1Ptr', and `z2Ptr'. 404 *----------------------------------------------------------------------------*/ 405 406 static inline void 407 add192( 408 uint64_t a0, 409 uint64_t a1, 410 uint64_t a2, 411 uint64_t b0, 412 uint64_t b1, 413 uint64_t b2, 414 uint64_t *z0Ptr, 415 uint64_t *z1Ptr, 416 uint64_t *z2Ptr 417 ) 418 { 419 uint64_t z0, z1, z2; 420 int8_t carry0, carry1; 421 422 z2 = a2 + b2; 423 carry1 = ( z2 < a2 ); 424 z1 = a1 + b1; 425 carry0 = ( z1 < a1 ); 426 z0 = a0 + b0; 427 z1 += carry1; 428 z0 += ( z1 < carry1 ); 429 z0 += carry0; 430 *z2Ptr = z2; 431 *z1Ptr = z1; 432 *z0Ptr = z0; 433 434 } 435 436 /*---------------------------------------------------------------------------- 437 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 438 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 439 | 2^128, so any borrow out (carry out) is lost. The result is broken into two 440 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 441 | `z1Ptr'. 442 *----------------------------------------------------------------------------*/ 443 444 static inline void 445 sub128( 446 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) 447 { 448 449 *z1Ptr = a1 - b1; 450 *z0Ptr = a0 - b0 - ( a1 < b1 ); 451 452 } 453 454 /*---------------------------------------------------------------------------- 455 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 456 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 457 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 458 | result is broken into three 64-bit pieces which are stored at the locations 459 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 460 *----------------------------------------------------------------------------*/ 461 462 static inline void 463 sub192( 464 uint64_t a0, 465 uint64_t a1, 466 uint64_t a2, 467 uint64_t b0, 468 uint64_t b1, 469 uint64_t b2, 470 uint64_t *z0Ptr, 471 uint64_t *z1Ptr, 472 uint64_t *z2Ptr 473 ) 474 { 475 uint64_t z0, z1, z2; 476 int8_t borrow0, borrow1; 477 478 z2 = a2 - b2; 479 borrow1 = ( a2 < b2 ); 480 z1 = a1 - b1; 481 borrow0 = ( a1 < b1 ); 482 z0 = a0 - b0; 483 z0 -= ( z1 < borrow1 ); 484 z1 -= borrow1; 485 z0 -= borrow0; 486 *z2Ptr = z2; 487 *z1Ptr = z1; 488 *z0Ptr = z0; 489 490 } 491 492 /*---------------------------------------------------------------------------- 493 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 494 | into two 64-bit pieces which are stored at the locations pointed to by 495 | `z0Ptr' and `z1Ptr'. 496 *----------------------------------------------------------------------------*/ 497 498 static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr ) 499 { 500 uint32_t aHigh, aLow, bHigh, bLow; 501 uint64_t z0, zMiddleA, zMiddleB, z1; 502 503 aLow = a; 504 aHigh = a>>32; 505 bLow = b; 506 bHigh = b>>32; 507 z1 = ( (uint64_t) aLow ) * bLow; 508 zMiddleA = ( (uint64_t) aLow ) * bHigh; 509 zMiddleB = ( (uint64_t) aHigh ) * bLow; 510 z0 = ( (uint64_t) aHigh ) * bHigh; 511 zMiddleA += zMiddleB; 512 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 513 zMiddleA <<= 32; 514 z1 += zMiddleA; 515 z0 += ( z1 < zMiddleA ); 516 *z1Ptr = z1; 517 *z0Ptr = z0; 518 519 } 520 521 /*---------------------------------------------------------------------------- 522 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 523 | `b' to obtain a 192-bit product. The product is broken into three 64-bit 524 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 525 | `z2Ptr'. 526 *----------------------------------------------------------------------------*/ 527 528 static inline void 529 mul128By64To192( 530 uint64_t a0, 531 uint64_t a1, 532 uint64_t b, 533 uint64_t *z0Ptr, 534 uint64_t *z1Ptr, 535 uint64_t *z2Ptr 536 ) 537 { 538 uint64_t z0, z1, z2, more1; 539 540 mul64To128( a1, b, &z1, &z2 ); 541 mul64To128( a0, b, &z0, &more1 ); 542 add128( z0, more1, 0, z1, &z0, &z1 ); 543 *z2Ptr = z2; 544 *z1Ptr = z1; 545 *z0Ptr = z0; 546 547 } 548 549 /*---------------------------------------------------------------------------- 550 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 551 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 552 | product. The product is broken into four 64-bit pieces which are stored at 553 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 554 *----------------------------------------------------------------------------*/ 555 556 static inline void 557 mul128To256( 558 uint64_t a0, 559 uint64_t a1, 560 uint64_t b0, 561 uint64_t b1, 562 uint64_t *z0Ptr, 563 uint64_t *z1Ptr, 564 uint64_t *z2Ptr, 565 uint64_t *z3Ptr 566 ) 567 { 568 uint64_t z0, z1, z2, z3; 569 uint64_t more1, more2; 570 571 mul64To128( a1, b1, &z2, &z3 ); 572 mul64To128( a1, b0, &z1, &more2 ); 573 add128( z1, more2, 0, z2, &z1, &z2 ); 574 mul64To128( a0, b0, &z0, &more1 ); 575 add128( z0, more1, 0, z1, &z0, &z1 ); 576 mul64To128( a0, b1, &more1, &more2 ); 577 add128( more1, more2, 0, z2, &more1, &z2 ); 578 add128( z0, z1, 0, more1, &z0, &z1 ); 579 *z3Ptr = z3; 580 *z2Ptr = z2; 581 *z1Ptr = z1; 582 *z0Ptr = z0; 583 584 } 585 586 /*---------------------------------------------------------------------------- 587 | Returns an approximation to the 64-bit integer quotient obtained by dividing 588 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 589 | divisor `b' must be at least 2^63. If q is the exact quotient truncated 590 | toward zero, the approximation returned lies between q and q + 2 inclusive. 591 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 592 | unsigned integer is returned. 593 *----------------------------------------------------------------------------*/ 594 595 static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) 596 { 597 uint64_t b0, b1; 598 uint64_t rem0, rem1, term0, term1; 599 uint64_t z; 600 601 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 602 b0 = b>>32; 603 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 604 mul64To128( b, z, &term0, &term1 ); 605 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 606 while ( ( (int64_t) rem0 ) < 0 ) { 607 z -= LIT64( 0x100000000 ); 608 b1 = b<<32; 609 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 610 } 611 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 612 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 613 return z; 614 615 } 616 617 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd 618 * (https://gmplib.org/repo/gmp/file/tip/longlong.h) 619 * 620 * Licensed under the GPLv2/LGPLv3 621 */ 622 static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d) 623 { 624 uint64_t d0, d1, q0, q1, r1, r0, m; 625 626 d0 = (uint32_t)d; 627 d1 = d >> 32; 628 629 r1 = n1 % d1; 630 q1 = n1 / d1; 631 m = q1 * d0; 632 r1 = (r1 << 32) | (n0 >> 32); 633 if (r1 < m) { 634 q1 -= 1; 635 r1 += d; 636 if (r1 >= d) { 637 if (r1 < m) { 638 q1 -= 1; 639 r1 += d; 640 } 641 } 642 } 643 r1 -= m; 644 645 r0 = r1 % d1; 646 q0 = r1 / d1; 647 m = q0 * d0; 648 r0 = (r0 << 32) | (uint32_t)n0; 649 if (r0 < m) { 650 q0 -= 1; 651 r0 += d; 652 if (r0 >= d) { 653 if (r0 < m) { 654 q0 -= 1; 655 r0 += d; 656 } 657 } 658 } 659 r0 -= m; 660 661 /* Return remainder in LSB */ 662 return (q1 << 32) | q0 | (r0 != 0); 663 } 664 665 /*---------------------------------------------------------------------------- 666 | Returns an approximation to the square root of the 32-bit significand given 667 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 668 | `aExp' (the least significant bit) is 1, the integer returned approximates 669 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 670 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 671 | case, the approximation returned lies strictly within +/-2 of the exact 672 | value. 673 *----------------------------------------------------------------------------*/ 674 675 static inline uint32_t estimateSqrt32(int aExp, uint32_t a) 676 { 677 static const uint16_t sqrtOddAdjustments[] = { 678 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 679 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 680 }; 681 static const uint16_t sqrtEvenAdjustments[] = { 682 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 683 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 684 }; 685 int8_t index; 686 uint32_t z; 687 688 index = ( a>>27 ) & 15; 689 if ( aExp & 1 ) { 690 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; 691 z = ( ( a / z )<<14 ) + ( z<<15 ); 692 a >>= 1; 693 } 694 else { 695 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; 696 z = a / z + z; 697 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 698 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); 699 } 700 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); 701 702 } 703 704 /*---------------------------------------------------------------------------- 705 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 706 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. 707 | Otherwise, returns 0. 708 *----------------------------------------------------------------------------*/ 709 710 static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 711 { 712 713 return ( a0 == b0 ) && ( a1 == b1 ); 714 715 } 716 717 /*---------------------------------------------------------------------------- 718 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 719 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 720 | Otherwise, returns 0. 721 *----------------------------------------------------------------------------*/ 722 723 static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 724 { 725 726 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 727 728 } 729 730 /*---------------------------------------------------------------------------- 731 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 732 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 733 | returns 0. 734 *----------------------------------------------------------------------------*/ 735 736 static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 737 { 738 739 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 740 741 } 742 743 /*---------------------------------------------------------------------------- 744 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 745 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. 746 | Otherwise, returns 0. 747 *----------------------------------------------------------------------------*/ 748 749 static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 750 { 751 752 return ( a0 != b0 ) || ( a1 != b1 ); 753 754 } 755