1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 * Note: I added some stuff for use with gnupg 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, 5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 6 * 7 * This file is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Library General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or (at your 10 * option) any later version. 11 * 12 * This file is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 15 * License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this file; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, 20 * MA 02111-1307, USA. */ 21 22 /* You have to define the following before including this file: 23 * 24 * UWtype -- An unsigned type, default type for operations (typically a "word") 25 * UHWtype -- An unsigned type, at least half the size of UWtype. 26 * UDWtype -- An unsigned type, at least twice as large a UWtype 27 * W_TYPE_SIZE -- size in bits of UWtype 28 * 29 * SItype, USItype -- Signed and unsigned 32 bit types. 30 * DItype, UDItype -- Signed and unsigned 64 bit types. 31 * 32 * On a 32 bit machine UWtype should typically be USItype; 33 * on a 64 bit machine, UWtype should typically be UDItype. 34 */ 35 36 #define __BITS4 (W_TYPE_SIZE / 4) 37 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 38 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 39 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 40 41 /* This is used to make sure no undesirable sharing between different libraries 42 that use this file takes place. */ 43 #ifndef __MPN 44 #define __MPN(x) __##x 45 #endif 46 47 /* Define auxiliary asm macros. 48 * 49 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two 50 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype 51 * word product in HIGH_PROD and LOW_PROD. 52 * 53 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 54 * UDWtype product. This is just a variant of umul_ppmm. 55 56 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 57 * denominator) divides a UDWtype, composed by the UWtype integers 58 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 59 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 60 * than DENOMINATOR for correct operation. If, in addition, the most 61 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol 62 * UDIV_NEEDS_NORMALIZATION is defined to 1. 63 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 64 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient 65 * is rounded towards 0. 66 * 67 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the 68 * msb to the first non-zero bit in the UWtype X. This is the number of 69 * steps X needs to be shifted left to set the msb. Undefined for X == 0, 70 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 71 * 72 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 73 * from the least significant end. 74 * 75 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 76 * high_addend_2, low_addend_2) adds two UWtype integers, composed by 77 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 78 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 79 * (i.e. carry out) is not stored anywhere, and is lost. 80 * 81 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 82 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 83 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 84 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 85 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 86 * and is lost. 87 * 88 * If any of these macros are left undefined for a particular CPU, 89 * C macros are used. */ 90 91 /* The CPUs come in alphabetical order below. 92 * 93 * Please add support for more CPUs here, or improve the current support 94 * for the CPUs below! */ 95 96 #if defined(__GNUC__) && !defined(NO_ASM) 97 98 /* We sometimes need to clobber "cc" with gcc2, but that would not be 99 understood by gcc1. Use cpp to avoid major code duplication. */ 100 #if __GNUC__ < 2 101 #define __CLOBBER_CC 102 #define __AND_CLOBBER_CC 103 #else /* __GNUC__ >= 2 */ 104 #define __CLOBBER_CC : "cc" 105 #define __AND_CLOBBER_CC , "cc" 106 #endif /* __GNUC__ < 2 */ 107 108 /*************************************** 109 ************** A29K ***************** 110 ***************************************/ 111 #if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 112 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 113 __asm__ ("add %1,%4,%5\n" \ 114 "addc %0,%2,%3" \ 115 : "=r" ((USItype)(sh)), \ 116 "=&r" ((USItype)(sl)) \ 117 : "%r" ((USItype)(ah)), \ 118 "rI" ((USItype)(bh)), \ 119 "%r" ((USItype)(al)), \ 120 "rI" ((USItype)(bl))) 121 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 122 __asm__ ("sub %1,%4,%5\n" \ 123 "subc %0,%2,%3" \ 124 : "=r" ((USItype)(sh)), \ 125 "=&r" ((USItype)(sl)) \ 126 : "r" ((USItype)(ah)), \ 127 "rI" ((USItype)(bh)), \ 128 "r" ((USItype)(al)), \ 129 "rI" ((USItype)(bl))) 130 #define umul_ppmm(xh, xl, m0, m1) \ 131 do { \ 132 USItype __m0 = (m0), __m1 = (m1); \ 133 __asm__ ("multiplu %0,%1,%2" \ 134 : "=r" ((USItype)(xl)) \ 135 : "r" (__m0), \ 136 "r" (__m1)); \ 137 __asm__ ("multmu %0,%1,%2" \ 138 : "=r" ((USItype)(xh)) \ 139 : "r" (__m0), \ 140 "r" (__m1)); \ 141 } while (0) 142 #define udiv_qrnnd(q, r, n1, n0, d) \ 143 __asm__ ("dividu %0,%3,%4" \ 144 : "=r" ((USItype)(q)), \ 145 "=q" ((USItype)(r)) \ 146 : "1" ((USItype)(n1)), \ 147 "r" ((USItype)(n0)), \ 148 "r" ((USItype)(d))) 149 150 #define count_leading_zeros(count, x) \ 151 __asm__ ("clz %0,%1" \ 152 : "=r" ((USItype)(count)) \ 153 : "r" ((USItype)(x))) 154 #define COUNT_LEADING_ZEROS_0 32 155 #endif /* __a29k__ */ 156 157 #if defined(__alpha) && W_TYPE_SIZE == 64 158 #define umul_ppmm(ph, pl, m0, m1) \ 159 do { \ 160 UDItype __m0 = (m0), __m1 = (m1); \ 161 __asm__ ("umulh %r1,%2,%0" \ 162 : "=r" ((UDItype) ph) \ 163 : "%rJ" (__m0), \ 164 "rI" (__m1)); \ 165 (pl) = __m0 * __m1; \ 166 } while (0) 167 #define UMUL_TIME 46 168 #ifndef LONGLONG_STANDALONE 169 #define udiv_qrnnd(q, r, n1, n0, d) \ 170 do { UDItype __r; \ 171 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 172 (r) = __r; \ 173 } while (0) 174 extern UDItype __udiv_qrnnd(); 175 #define UDIV_TIME 220 176 #endif /* LONGLONG_STANDALONE */ 177 #endif /* __alpha */ 178 179 /*************************************** 180 ************** ARM ****************** 181 ***************************************/ 182 #if defined(__arm__) && W_TYPE_SIZE == 32 183 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 184 __asm__ ("adds %1, %4, %5\n" \ 185 "adc %0, %2, %3" \ 186 : "=r" ((USItype)(sh)), \ 187 "=&r" ((USItype)(sl)) \ 188 : "%r" ((USItype)(ah)), \ 189 "rI" ((USItype)(bh)), \ 190 "%r" ((USItype)(al)), \ 191 "rI" ((USItype)(bl))) 192 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 193 __asm__ ("subs %1, %4, %5\n" \ 194 "sbc %0, %2, %3" \ 195 : "=r" ((USItype)(sh)), \ 196 "=&r" ((USItype)(sl)) \ 197 : "r" ((USItype)(ah)), \ 198 "rI" ((USItype)(bh)), \ 199 "r" ((USItype)(al)), \ 200 "rI" ((USItype)(bl))) 201 #if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ 202 #define umul_ppmm(xh, xl, a, b) \ 203 __asm__ ("%@ Inlined umul_ppmm\n" \ 204 "mov %|r0, %2, lsr #16 @ AAAA\n" \ 205 "mov %|r2, %3, lsr #16 @ BBBB\n" \ 206 "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ 207 "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ 208 "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ 209 "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ 210 "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ 211 "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ 212 "adds %|r0, %1, %0 @ central sum\n" \ 213 "addcs %|r2, %|r2, #65536\n" \ 214 "adds %1, %|r1, %|r0, lsl #16\n" \ 215 "adc %0, %|r2, %|r0, lsr #16" \ 216 : "=&r" ((USItype)(xh)), \ 217 "=r" ((USItype)(xl)) \ 218 : "r" ((USItype)(a)), \ 219 "r" ((USItype)(b)) \ 220 : "r0", "r1", "r2") 221 #else 222 #define umul_ppmm(xh, xl, a, b) \ 223 __asm__ ("%@ Inlined umul_ppmm\n" \ 224 "umull %r1, %r0, %r2, %r3" \ 225 : "=&r" ((USItype)(xh)), \ 226 "=r" ((USItype)(xl)) \ 227 : "r" ((USItype)(a)), \ 228 "r" ((USItype)(b)) \ 229 : "r0", "r1") 230 #endif 231 #define UMUL_TIME 20 232 #define UDIV_TIME 100 233 #endif /* __arm__ */ 234 235 /*************************************** 236 ************** CLIPPER ************** 237 ***************************************/ 238 #if defined(__clipper__) && W_TYPE_SIZE == 32 239 #define umul_ppmm(w1, w0, u, v) \ 240 ({union {UDItype __ll; \ 241 struct {USItype __l, __h; } __i; \ 242 } __xx; \ 243 __asm__ ("mulwux %2,%0" \ 244 : "=r" (__xx.__ll) \ 245 : "%0" ((USItype)(u)), \ 246 "r" ((USItype)(v))); \ 247 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 248 #define smul_ppmm(w1, w0, u, v) \ 249 ({union {DItype __ll; \ 250 struct {SItype __l, __h; } __i; \ 251 } __xx; \ 252 __asm__ ("mulwx %2,%0" \ 253 : "=r" (__xx.__ll) \ 254 : "%0" ((SItype)(u)), \ 255 "r" ((SItype)(v))); \ 256 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 257 #define __umulsidi3(u, v) \ 258 ({UDItype __w; \ 259 __asm__ ("mulwux %2,%0" \ 260 : "=r" (__w) \ 261 : "%0" ((USItype)(u)), \ 262 "r" ((USItype)(v))); \ 263 __w; }) 264 #endif /* __clipper__ */ 265 266 /*************************************** 267 ************** GMICRO *************** 268 ***************************************/ 269 #if defined(__gmicro__) && W_TYPE_SIZE == 32 270 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 271 __asm__ ("add.w %5,%1\n" \ 272 "addx %3,%0" \ 273 : "=g" ((USItype)(sh)), \ 274 "=&g" ((USItype)(sl)) \ 275 : "%0" ((USItype)(ah)), \ 276 "g" ((USItype)(bh)), \ 277 "%1" ((USItype)(al)), \ 278 "g" ((USItype)(bl))) 279 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 280 __asm__ ("sub.w %5,%1\n" \ 281 "subx %3,%0" \ 282 : "=g" ((USItype)(sh)), \ 283 "=&g" ((USItype)(sl)) \ 284 : "0" ((USItype)(ah)), \ 285 "g" ((USItype)(bh)), \ 286 "1" ((USItype)(al)), \ 287 "g" ((USItype)(bl))) 288 #define umul_ppmm(ph, pl, m0, m1) \ 289 __asm__ ("mulx %3,%0,%1" \ 290 : "=g" ((USItype)(ph)), \ 291 "=r" ((USItype)(pl)) \ 292 : "%0" ((USItype)(m0)), \ 293 "g" ((USItype)(m1))) 294 #define udiv_qrnnd(q, r, nh, nl, d) \ 295 __asm__ ("divx %4,%0,%1" \ 296 : "=g" ((USItype)(q)), \ 297 "=r" ((USItype)(r)) \ 298 : "1" ((USItype)(nh)), \ 299 "0" ((USItype)(nl)), \ 300 "g" ((USItype)(d))) 301 #define count_leading_zeros(count, x) \ 302 __asm__ ("bsch/1 %1,%0" \ 303 : "=g" (count) \ 304 : "g" ((USItype)(x)), \ 305 "0" ((USItype)0)) 306 #endif 307 308 /*************************************** 309 ************** HPPA ***************** 310 ***************************************/ 311 #if defined(__hppa) && W_TYPE_SIZE == 32 312 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 313 __asm__ ("add %4,%5,%1\n" \ 314 "addc %2,%3,%0" \ 315 : "=r" ((USItype)(sh)), \ 316 "=&r" ((USItype)(sl)) \ 317 : "%rM" ((USItype)(ah)), \ 318 "rM" ((USItype)(bh)), \ 319 "%rM" ((USItype)(al)), \ 320 "rM" ((USItype)(bl))) 321 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 322 __asm__ ("sub %4,%5,%1\n" \ 323 "subb %2,%3,%0" \ 324 : "=r" ((USItype)(sh)), \ 325 "=&r" ((USItype)(sl)) \ 326 : "rM" ((USItype)(ah)), \ 327 "rM" ((USItype)(bh)), \ 328 "rM" ((USItype)(al)), \ 329 "rM" ((USItype)(bl))) 330 #if defined(_PA_RISC1_1) 331 #define umul_ppmm(wh, wl, u, v) \ 332 do { \ 333 union {UDItype __ll; \ 334 struct {USItype __h, __l; } __i; \ 335 } __xx; \ 336 __asm__ ("xmpyu %1,%2,%0" \ 337 : "=*f" (__xx.__ll) \ 338 : "*f" ((USItype)(u)), \ 339 "*f" ((USItype)(v))); \ 340 (wh) = __xx.__i.__h; \ 341 (wl) = __xx.__i.__l; \ 342 } while (0) 343 #define UMUL_TIME 8 344 #define UDIV_TIME 60 345 #else 346 #define UMUL_TIME 40 347 #define UDIV_TIME 80 348 #endif 349 #ifndef LONGLONG_STANDALONE 350 #define udiv_qrnnd(q, r, n1, n0, d) \ 351 do { USItype __r; \ 352 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 353 (r) = __r; \ 354 } while (0) 355 extern USItype __udiv_qrnnd(); 356 #endif /* LONGLONG_STANDALONE */ 357 #define count_leading_zeros(count, x) \ 358 do { \ 359 USItype __tmp; \ 360 __asm__ ( \ 361 "ldi 1,%0\n" \ 362 "extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ 363 "extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ 364 "ldo 16(%0),%0 ; Yes. Perform add.\n" \ 365 "extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ 366 "extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ 367 "ldo 8(%0),%0 ; Yes. Perform add.\n" \ 368 "extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ 369 "extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ 370 "ldo 4(%0),%0 ; Yes. Perform add.\n" \ 371 "extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ 372 "extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ 373 "ldo 2(%0),%0 ; Yes. Perform add.\n" \ 374 "extru %1,30,1,%1 ; Extract bit 1.\n" \ 375 "sub %0,%1,%0 ; Subtract it. " \ 376 : "=r" (count), "=r" (__tmp) : "1" (x)); \ 377 } while (0) 378 #endif /* hppa */ 379 380 /*************************************** 381 ************** I370 ***************** 382 ***************************************/ 383 #if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 384 #define umul_ppmm(xh, xl, m0, m1) \ 385 do { \ 386 union {UDItype __ll; \ 387 struct {USItype __h, __l; } __i; \ 388 } __xx; \ 389 USItype __m0 = (m0), __m1 = (m1); \ 390 __asm__ ("mr %0,%3" \ 391 : "=r" (__xx.__i.__h), \ 392 "=r" (__xx.__i.__l) \ 393 : "%1" (__m0), \ 394 "r" (__m1)); \ 395 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 396 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 397 + (((SItype) __m1 >> 31) & __m0)); \ 398 } while (0) 399 #define smul_ppmm(xh, xl, m0, m1) \ 400 do { \ 401 union {DItype __ll; \ 402 struct {USItype __h, __l; } __i; \ 403 } __xx; \ 404 __asm__ ("mr %0,%3" \ 405 : "=r" (__xx.__i.__h), \ 406 "=r" (__xx.__i.__l) \ 407 : "%1" (m0), \ 408 "r" (m1)); \ 409 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 410 } while (0) 411 #define sdiv_qrnnd(q, r, n1, n0, d) \ 412 do { \ 413 union {DItype __ll; \ 414 struct {USItype __h, __l; } __i; \ 415 } __xx; \ 416 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 417 __asm__ ("dr %0,%2" \ 418 : "=r" (__xx.__ll) \ 419 : "0" (__xx.__ll), "r" (d)); \ 420 (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ 421 } while (0) 422 #endif 423 424 /*************************************** 425 ************** I386 ***************** 426 ***************************************/ 427 #undef __i386__ 428 #if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 429 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 430 __asm__ ("addl %5,%1\n" \ 431 "adcl %3,%0" \ 432 : "=r" ((USItype)(sh)), \ 433 "=&r" ((USItype)(sl)) \ 434 : "%0" ((USItype)(ah)), \ 435 "g" ((USItype)(bh)), \ 436 "%1" ((USItype)(al)), \ 437 "g" ((USItype)(bl))) 438 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 439 __asm__ ("subl %5,%1\n" \ 440 "sbbl %3,%0" \ 441 : "=r" ((USItype)(sh)), \ 442 "=&r" ((USItype)(sl)) \ 443 : "0" ((USItype)(ah)), \ 444 "g" ((USItype)(bh)), \ 445 "1" ((USItype)(al)), \ 446 "g" ((USItype)(bl))) 447 #define umul_ppmm(w1, w0, u, v) \ 448 __asm__ ("mull %3" \ 449 : "=a" ((USItype)(w0)), \ 450 "=d" ((USItype)(w1)) \ 451 : "%0" ((USItype)(u)), \ 452 "rm" ((USItype)(v))) 453 #define udiv_qrnnd(q, r, n1, n0, d) \ 454 __asm__ ("divl %4" \ 455 : "=a" ((USItype)(q)), \ 456 "=d" ((USItype)(r)) \ 457 : "0" ((USItype)(n0)), \ 458 "1" ((USItype)(n1)), \ 459 "rm" ((USItype)(d))) 460 #define count_leading_zeros(count, x) \ 461 do { \ 462 USItype __cbtmp; \ 463 __asm__ ("bsrl %1,%0" \ 464 : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ 465 (count) = __cbtmp ^ 31; \ 466 } while (0) 467 #define count_trailing_zeros(count, x) \ 468 __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) 469 #ifndef UMUL_TIME 470 #define UMUL_TIME 40 471 #endif 472 #ifndef UDIV_TIME 473 #define UDIV_TIME 40 474 #endif 475 #endif /* 80x86 */ 476 477 /*************************************** 478 ************** I860 ***************** 479 ***************************************/ 480 #if defined(__i860__) && W_TYPE_SIZE == 32 481 #define rshift_rhlc(r, h, l, c) \ 482 __asm__ ("shr %3,r0,r0\n" \ 483 "shrd %1,%2,%0" \ 484 "=r" (r) : "r" (h), "r" (l), "rn" (c)) 485 #endif /* i860 */ 486 487 /*************************************** 488 ************** I960 ***************** 489 ***************************************/ 490 #if defined(__i960__) && W_TYPE_SIZE == 32 491 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 492 __asm__ ("cmpo 1,0\n" \ 493 "addc %5,%4,%1\n" \ 494 "addc %3,%2,%0" \ 495 : "=r" ((USItype)(sh)), \ 496 "=&r" ((USItype)(sl)) \ 497 : "%dI" ((USItype)(ah)), \ 498 "dI" ((USItype)(bh)), \ 499 "%dI" ((USItype)(al)), \ 500 "dI" ((USItype)(bl))) 501 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 502 __asm__ ("cmpo 0,0\n" \ 503 "subc %5,%4,%1\n" \ 504 "subc %3,%2,%0" \ 505 : "=r" ((USItype)(sh)), \ 506 "=&r" ((USItype)(sl)) \ 507 : "dI" ((USItype)(ah)), \ 508 "dI" ((USItype)(bh)), \ 509 "dI" ((USItype)(al)), \ 510 "dI" ((USItype)(bl))) 511 #define umul_ppmm(w1, w0, u, v) \ 512 ({union {UDItype __ll; \ 513 struct {USItype __l, __h; } __i; \ 514 } __xx; \ 515 __asm__ ("emul %2,%1,%0" \ 516 : "=d" (__xx.__ll) \ 517 : "%dI" ((USItype)(u)), \ 518 "dI" ((USItype)(v))); \ 519 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 520 #define __umulsidi3(u, v) \ 521 ({UDItype __w; \ 522 __asm__ ("emul %2,%1,%0" \ 523 : "=d" (__w) \ 524 : "%dI" ((USItype)(u)), \ 525 "dI" ((USItype)(v))); \ 526 __w; }) 527 #define udiv_qrnnd(q, r, nh, nl, d) \ 528 do { \ 529 union {UDItype __ll; \ 530 struct {USItype __l, __h; } __i; \ 531 } __nn; \ 532 __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ 533 __asm__ ("ediv %d,%n,%0" \ 534 : "=d" (__rq.__ll) \ 535 : "dI" (__nn.__ll), \ 536 "dI" ((USItype)(d))); \ 537 (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ 538 } while (0) 539 #define count_leading_zeros(count, x) \ 540 do { \ 541 USItype __cbtmp; \ 542 __asm__ ("scanbit %1,%0" \ 543 : "=r" (__cbtmp) \ 544 : "r" ((USItype)(x))); \ 545 (count) = __cbtmp ^ 31; \ 546 } while (0) 547 #define COUNT_LEADING_ZEROS_0 (-32) /* sic */ 548 #if defined(__i960mx) /* what is the proper symbol to test??? */ 549 #define rshift_rhlc(r, h, l, c) \ 550 do { \ 551 union {UDItype __ll; \ 552 struct {USItype __l, __h; } __i; \ 553 } __nn; \ 554 __nn.__i.__h = (h); __nn.__i.__l = (l); \ 555 __asm__ ("shre %2,%1,%0" \ 556 : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ 557 } 558 #endif /* i960mx */ 559 #endif /* i960 */ 560 561 /*************************************** 562 ************** 68000 **************** 563 ***************************************/ 564 #if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 565 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 566 __asm__ ("add%.l %5,%1\n" \ 567 "addx%.l %3,%0" \ 568 : "=d" ((USItype)(sh)), \ 569 "=&d" ((USItype)(sl)) \ 570 : "%0" ((USItype)(ah)), \ 571 "d" ((USItype)(bh)), \ 572 "%1" ((USItype)(al)), \ 573 "g" ((USItype)(bl))) 574 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 575 __asm__ ("sub%.l %5,%1\n" \ 576 "subx%.l %3,%0" \ 577 : "=d" ((USItype)(sh)), \ 578 "=&d" ((USItype)(sl)) \ 579 : "0" ((USItype)(ah)), \ 580 "d" ((USItype)(bh)), \ 581 "1" ((USItype)(al)), \ 582 "g" ((USItype)(bl))) 583 #if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) 584 #define umul_ppmm(w1, w0, u, v) \ 585 __asm__ ("mulu%.l %3,%1:%0" \ 586 : "=d" ((USItype)(w0)), \ 587 "=d" ((USItype)(w1)) \ 588 : "%0" ((USItype)(u)), \ 589 "dmi" ((USItype)(v))) 590 #define UMUL_TIME 45 591 #define udiv_qrnnd(q, r, n1, n0, d) \ 592 __asm__ ("divu%.l %4,%1:%0" \ 593 : "=d" ((USItype)(q)), \ 594 "=d" ((USItype)(r)) \ 595 : "0" ((USItype)(n0)), \ 596 "1" ((USItype)(n1)), \ 597 "dmi" ((USItype)(d))) 598 #define UDIV_TIME 90 599 #define sdiv_qrnnd(q, r, n1, n0, d) \ 600 __asm__ ("divs%.l %4,%1:%0" \ 601 : "=d" ((USItype)(q)), \ 602 "=d" ((USItype)(r)) \ 603 : "0" ((USItype)(n0)), \ 604 "1" ((USItype)(n1)), \ 605 "dmi" ((USItype)(d))) 606 #define count_leading_zeros(count, x) \ 607 __asm__ ("bfffo %1{%b2:%b2},%0" \ 608 : "=d" ((USItype)(count)) \ 609 : "od" ((USItype)(x)), "n" (0)) 610 #define COUNT_LEADING_ZEROS_0 32 611 #else /* not mc68020 */ 612 #define umul_ppmm(xh, xl, a, b) \ 613 do { USItype __umul_tmp1, __umul_tmp2; \ 614 __asm__ ("| Inlined umul_ppmm\n" \ 615 "move%.l %5,%3\n" \ 616 "move%.l %2,%0\n" \ 617 "move%.w %3,%1\n" \ 618 "swap %3\n" \ 619 "swap %0\n" \ 620 "mulu %2,%1\n" \ 621 "mulu %3,%0\n" \ 622 "mulu %2,%3\n" \ 623 "swap %2\n" \ 624 "mulu %5,%2\n" \ 625 "add%.l %3,%2\n" \ 626 "jcc 1f\n" \ 627 "add%.l %#0x10000,%0\n" \ 628 "1: move%.l %2,%3\n" \ 629 "clr%.w %2\n" \ 630 "swap %2\n" \ 631 "swap %3\n" \ 632 "clr%.w %3\n" \ 633 "add%.l %3,%1\n" \ 634 "addx%.l %2,%0\n" \ 635 "| End inlined umul_ppmm" \ 636 : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ 637 "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ 638 : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ 639 } while (0) 640 #define UMUL_TIME 100 641 #define UDIV_TIME 400 642 #endif /* not mc68020 */ 643 #endif /* mc68000 */ 644 645 /*************************************** 646 ************** 88000 **************** 647 ***************************************/ 648 #if defined(__m88000__) && W_TYPE_SIZE == 32 649 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 650 __asm__ ("addu.co %1,%r4,%r5\n" \ 651 "addu.ci %0,%r2,%r3" \ 652 : "=r" ((USItype)(sh)), \ 653 "=&r" ((USItype)(sl)) \ 654 : "%rJ" ((USItype)(ah)), \ 655 "rJ" ((USItype)(bh)), \ 656 "%rJ" ((USItype)(al)), \ 657 "rJ" ((USItype)(bl))) 658 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 659 __asm__ ("subu.co %1,%r4,%r5\n" \ 660 "subu.ci %0,%r2,%r3" \ 661 : "=r" ((USItype)(sh)), \ 662 "=&r" ((USItype)(sl)) \ 663 : "rJ" ((USItype)(ah)), \ 664 "rJ" ((USItype)(bh)), \ 665 "rJ" ((USItype)(al)), \ 666 "rJ" ((USItype)(bl))) 667 #define count_leading_zeros(count, x) \ 668 do { \ 669 USItype __cbtmp; \ 670 __asm__ ("ff1 %0,%1" \ 671 : "=r" (__cbtmp) \ 672 : "r" ((USItype)(x))); \ 673 (count) = __cbtmp ^ 31; \ 674 } while (0) 675 #define COUNT_LEADING_ZEROS_0 63 /* sic */ 676 #if defined(__m88110__) 677 #define umul_ppmm(wh, wl, u, v) \ 678 do { \ 679 union {UDItype __ll; \ 680 struct {USItype __h, __l; } __i; \ 681 } __x; \ 682 __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ 683 (wh) = __x.__i.__h; \ 684 (wl) = __x.__i.__l; \ 685 } while (0) 686 #define udiv_qrnnd(q, r, n1, n0, d) \ 687 ({union {UDItype __ll; \ 688 struct {USItype __h, __l; } __i; \ 689 } __x, __q; \ 690 __x.__i.__h = (n1); __x.__i.__l = (n0); \ 691 __asm__ ("divu.d %0,%1,%2" \ 692 : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ 693 (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) 694 #define UMUL_TIME 5 695 #define UDIV_TIME 25 696 #else 697 #define UMUL_TIME 17 698 #define UDIV_TIME 150 699 #endif /* __m88110__ */ 700 #endif /* __m88000__ */ 701 702 /*************************************** 703 ************** MIPS ***************** 704 ***************************************/ 705 #if defined(__mips__) && W_TYPE_SIZE == 32 706 #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 707 #define umul_ppmm(w1, w0, u, v) \ 708 __asm__ ("multu %2,%3" \ 709 : "=l" ((USItype)(w0)), \ 710 "=h" ((USItype)(w1)) \ 711 : "d" ((USItype)(u)), \ 712 "d" ((USItype)(v))) 713 #else 714 #define umul_ppmm(w1, w0, u, v) \ 715 __asm__ ("multu %2,%3\n" \ 716 "mflo %0\n" \ 717 "mfhi %1" \ 718 : "=d" ((USItype)(w0)), \ 719 "=d" ((USItype)(w1)) \ 720 : "d" ((USItype)(u)), \ 721 "d" ((USItype)(v))) 722 #endif 723 #define UMUL_TIME 10 724 #define UDIV_TIME 100 725 #endif /* __mips__ */ 726 727 /*************************************** 728 ************** MIPS/64 ************** 729 ***************************************/ 730 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 731 #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 732 #define umul_ppmm(w1, w0, u, v) \ 733 __asm__ ("dmultu %2,%3" \ 734 : "=l" ((UDItype)(w0)), \ 735 "=h" ((UDItype)(w1)) \ 736 : "d" ((UDItype)(u)), \ 737 "d" ((UDItype)(v))) 738 #else 739 #define umul_ppmm(w1, w0, u, v) \ 740 __asm__ ("dmultu %2,%3\n" \ 741 "mflo %0\n" \ 742 "mfhi %1" \ 743 : "=d" ((UDItype)(w0)), \ 744 "=d" ((UDItype)(w1)) \ 745 : "d" ((UDItype)(u)), \ 746 "d" ((UDItype)(v))) 747 #endif 748 #define UMUL_TIME 20 749 #define UDIV_TIME 140 750 #endif /* __mips__ */ 751 752 /*************************************** 753 ************** 32000 **************** 754 ***************************************/ 755 #if defined(__ns32000__) && W_TYPE_SIZE == 32 756 #define umul_ppmm(w1, w0, u, v) \ 757 ({union {UDItype __ll; \ 758 struct {USItype __l, __h; } __i; \ 759 } __xx; \ 760 __asm__ ("meid %2,%0" \ 761 : "=g" (__xx.__ll) \ 762 : "%0" ((USItype)(u)), \ 763 "g" ((USItype)(v))); \ 764 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 765 #define __umulsidi3(u, v) \ 766 ({UDItype __w; \ 767 __asm__ ("meid %2,%0" \ 768 : "=g" (__w) \ 769 : "%0" ((USItype)(u)), \ 770 "g" ((USItype)(v))); \ 771 __w; }) 772 #define udiv_qrnnd(q, r, n1, n0, d) \ 773 ({union {UDItype __ll; \ 774 struct {USItype __l, __h; } __i; \ 775 } __xx; \ 776 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 777 __asm__ ("deid %2,%0" \ 778 : "=g" (__xx.__ll) \ 779 : "0" (__xx.__ll), \ 780 "g" ((USItype)(d))); \ 781 (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) 782 #define count_trailing_zeros(count, x) \ 783 do { \ 784 __asm__("ffsd %2,%0" \ 785 : "=r"((USItype) (count)) \ 786 : "0"((USItype) 0), "r"((USItype) (x))); \ 787 } while (0) 788 #endif /* __ns32000__ */ 789 790 /*************************************** 791 ************** PPC ****************** 792 ***************************************/ 793 #if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 794 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 795 do { \ 796 if (__builtin_constant_p(bh) && (bh) == 0) \ 797 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ 798 : "=r" ((USItype)(sh)), \ 799 "=&r" ((USItype)(sl)) \ 800 : "%r" ((USItype)(ah)), \ 801 "%r" ((USItype)(al)), \ 802 "rI" ((USItype)(bl))); \ 803 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 804 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ 805 : "=r" ((USItype)(sh)), \ 806 "=&r" ((USItype)(sl)) \ 807 : "%r" ((USItype)(ah)), \ 808 "%r" ((USItype)(al)), \ 809 "rI" ((USItype)(bl))); \ 810 else \ 811 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ 812 : "=r" ((USItype)(sh)), \ 813 "=&r" ((USItype)(sl)) \ 814 : "%r" ((USItype)(ah)), \ 815 "r" ((USItype)(bh)), \ 816 "%r" ((USItype)(al)), \ 817 "rI" ((USItype)(bl))); \ 818 } while (0) 819 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 820 do { \ 821 if (__builtin_constant_p(ah) && (ah) == 0) \ 822 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ 823 : "=r" ((USItype)(sh)), \ 824 "=&r" ((USItype)(sl)) \ 825 : "r" ((USItype)(bh)), \ 826 "rI" ((USItype)(al)), \ 827 "r" ((USItype)(bl))); \ 828 else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ 829 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ 830 : "=r" ((USItype)(sh)), \ 831 "=&r" ((USItype)(sl)) \ 832 : "r" ((USItype)(bh)), \ 833 "rI" ((USItype)(al)), \ 834 "r" ((USItype)(bl))); \ 835 else if (__builtin_constant_p(bh) && (bh) == 0) \ 836 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ 837 : "=r" ((USItype)(sh)), \ 838 "=&r" ((USItype)(sl)) \ 839 : "r" ((USItype)(ah)), \ 840 "rI" ((USItype)(al)), \ 841 "r" ((USItype)(bl))); \ 842 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 843 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ 844 : "=r" ((USItype)(sh)), \ 845 "=&r" ((USItype)(sl)) \ 846 : "r" ((USItype)(ah)), \ 847 "rI" ((USItype)(al)), \ 848 "r" ((USItype)(bl))); \ 849 else \ 850 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ 851 : "=r" ((USItype)(sh)), \ 852 "=&r" ((USItype)(sl)) \ 853 : "r" ((USItype)(ah)), \ 854 "r" ((USItype)(bh)), \ 855 "rI" ((USItype)(al)), \ 856 "r" ((USItype)(bl))); \ 857 } while (0) 858 #define count_leading_zeros(count, x) \ 859 __asm__ ("{cntlz|cntlzw} %0,%1" \ 860 : "=r" ((USItype)(count)) \ 861 : "r" ((USItype)(x))) 862 #define COUNT_LEADING_ZEROS_0 32 863 #if defined(_ARCH_PPC) 864 #define umul_ppmm(ph, pl, m0, m1) \ 865 do { \ 866 USItype __m0 = (m0), __m1 = (m1); \ 867 __asm__ ("mulhwu %0,%1,%2" \ 868 : "=r" ((USItype) ph) \ 869 : "%r" (__m0), \ 870 "r" (__m1)); \ 871 (pl) = __m0 * __m1; \ 872 } while (0) 873 #define UMUL_TIME 15 874 #define smul_ppmm(ph, pl, m0, m1) \ 875 do { \ 876 SItype __m0 = (m0), __m1 = (m1); \ 877 __asm__ ("mulhw %0,%1,%2" \ 878 : "=r" ((SItype) ph) \ 879 : "%r" (__m0), \ 880 "r" (__m1)); \ 881 (pl) = __m0 * __m1; \ 882 } while (0) 883 #define SMUL_TIME 14 884 #define UDIV_TIME 120 885 #else 886 #define umul_ppmm(xh, xl, m0, m1) \ 887 do { \ 888 USItype __m0 = (m0), __m1 = (m1); \ 889 __asm__ ("mul %0,%2,%3" \ 890 : "=r" ((USItype)(xh)), \ 891 "=q" ((USItype)(xl)) \ 892 : "r" (__m0), \ 893 "r" (__m1)); \ 894 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 895 + (((SItype) __m1 >> 31) & __m0)); \ 896 } while (0) 897 #define UMUL_TIME 8 898 #define smul_ppmm(xh, xl, m0, m1) \ 899 __asm__ ("mul %0,%2,%3" \ 900 : "=r" ((SItype)(xh)), \ 901 "=q" ((SItype)(xl)) \ 902 : "r" (m0), \ 903 "r" (m1)) 904 #define SMUL_TIME 4 905 #define sdiv_qrnnd(q, r, nh, nl, d) \ 906 __asm__ ("div %0,%2,%4" \ 907 : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ 908 : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) 909 #define UDIV_TIME 100 910 #endif 911 #endif /* Power architecture variants. */ 912 913 /*************************************** 914 ************** PYR ****************** 915 ***************************************/ 916 #if defined(__pyr__) && W_TYPE_SIZE == 32 917 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 918 __asm__ ("addw %5,%1\n" \ 919 "addwc %3,%0" \ 920 : "=r" ((USItype)(sh)), \ 921 "=&r" ((USItype)(sl)) \ 922 : "%0" ((USItype)(ah)), \ 923 "g" ((USItype)(bh)), \ 924 "%1" ((USItype)(al)), \ 925 "g" ((USItype)(bl))) 926 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 927 __asm__ ("subw %5,%1\n" \ 928 "subwb %3,%0" \ 929 : "=r" ((USItype)(sh)), \ 930 "=&r" ((USItype)(sl)) \ 931 : "0" ((USItype)(ah)), \ 932 "g" ((USItype)(bh)), \ 933 "1" ((USItype)(al)), \ 934 "g" ((USItype)(bl))) 935 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ 936 #define umul_ppmm(w1, w0, u, v) \ 937 ({union {UDItype __ll; \ 938 struct {USItype __h, __l; } __i; \ 939 } __xx; \ 940 __asm__ ("movw %1,%R0\n" \ 941 "uemul %2,%0" \ 942 : "=&r" (__xx.__ll) \ 943 : "g" ((USItype) (u)), \ 944 "g" ((USItype)(v))); \ 945 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 946 #endif /* __pyr__ */ 947 948 /*************************************** 949 ************** RT/ROMP ************** 950 ***************************************/ 951 #if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 952 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 953 __asm__ ("a %1,%5\n" \ 954 "ae %0,%3" \ 955 : "=r" ((USItype)(sh)), \ 956 "=&r" ((USItype)(sl)) \ 957 : "%0" ((USItype)(ah)), \ 958 "r" ((USItype)(bh)), \ 959 "%1" ((USItype)(al)), \ 960 "r" ((USItype)(bl))) 961 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 962 __asm__ ("s %1,%5\n" \ 963 "se %0,%3" \ 964 : "=r" ((USItype)(sh)), \ 965 "=&r" ((USItype)(sl)) \ 966 : "0" ((USItype)(ah)), \ 967 "r" ((USItype)(bh)), \ 968 "1" ((USItype)(al)), \ 969 "r" ((USItype)(bl))) 970 #define umul_ppmm(ph, pl, m0, m1) \ 971 do { \ 972 USItype __m0 = (m0), __m1 = (m1); \ 973 __asm__ ( \ 974 "s r2,r2\n" \ 975 "mts r10,%2\n" \ 976 "m r2,%3\n" \ 977 "m r2,%3\n" \ 978 "m r2,%3\n" \ 979 "m r2,%3\n" \ 980 "m r2,%3\n" \ 981 "m r2,%3\n" \ 982 "m r2,%3\n" \ 983 "m r2,%3\n" \ 984 "m r2,%3\n" \ 985 "m r2,%3\n" \ 986 "m r2,%3\n" \ 987 "m r2,%3\n" \ 988 "m r2,%3\n" \ 989 "m r2,%3\n" \ 990 "m r2,%3\n" \ 991 "m r2,%3\n" \ 992 "cas %0,r2,r0\n" \ 993 "mfs r10,%1" \ 994 : "=r" ((USItype)(ph)), \ 995 "=r" ((USItype)(pl)) \ 996 : "%r" (__m0), \ 997 "r" (__m1) \ 998 : "r2"); \ 999 (ph) += ((((SItype) __m0 >> 31) & __m1) \ 1000 + (((SItype) __m1 >> 31) & __m0)); \ 1001 } while (0) 1002 #define UMUL_TIME 20 1003 #define UDIV_TIME 200 1004 #define count_leading_zeros(count, x) \ 1005 do { \ 1006 if ((x) >= 0x10000) \ 1007 __asm__ ("clz %0,%1" \ 1008 : "=r" ((USItype)(count)) \ 1009 : "r" ((USItype)(x) >> 16)); \ 1010 else { \ 1011 __asm__ ("clz %0,%1" \ 1012 : "=r" ((USItype)(count)) \ 1013 : "r" ((USItype)(x))); \ 1014 (count) += 16; \ 1015 } \ 1016 } while (0) 1017 #endif /* RT/ROMP */ 1018 1019 /*************************************** 1020 ************** SH2 ****************** 1021 ***************************************/ 1022 #if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ 1023 && W_TYPE_SIZE == 32 1024 #define umul_ppmm(w1, w0, u, v) \ 1025 __asm__ ( \ 1026 "dmulu.l %2,%3\n" \ 1027 "sts macl,%1\n" \ 1028 "sts mach,%0" \ 1029 : "=r" ((USItype)(w1)), \ 1030 "=r" ((USItype)(w0)) \ 1031 : "r" ((USItype)(u)), \ 1032 "r" ((USItype)(v)) \ 1033 : "macl", "mach") 1034 #define UMUL_TIME 5 1035 #endif 1036 1037 /*************************************** 1038 ************** SPARC **************** 1039 ***************************************/ 1040 #if defined(__sparc__) && W_TYPE_SIZE == 32 1041 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1042 __asm__ ("addcc %r4,%5,%1\n" \ 1043 "addx %r2,%3,%0" \ 1044 : "=r" ((USItype)(sh)), \ 1045 "=&r" ((USItype)(sl)) \ 1046 : "%rJ" ((USItype)(ah)), \ 1047 "rI" ((USItype)(bh)), \ 1048 "%rJ" ((USItype)(al)), \ 1049 "rI" ((USItype)(bl)) \ 1050 __CLOBBER_CC) 1051 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1052 __asm__ ("subcc %r4,%5,%1\n" \ 1053 "subx %r2,%3,%0" \ 1054 : "=r" ((USItype)(sh)), \ 1055 "=&r" ((USItype)(sl)) \ 1056 : "rJ" ((USItype)(ah)), \ 1057 "rI" ((USItype)(bh)), \ 1058 "rJ" ((USItype)(al)), \ 1059 "rI" ((USItype)(bl)) \ 1060 __CLOBBER_CC) 1061 #if defined(__sparc_v8__) 1062 /* Don't match immediate range because, 1) it is not often useful, 1063 2) the 'I' flag thinks of the range as a 13 bit signed interval, 1064 while we want to match a 13 bit interval, sign extended to 32 bits, 1065 but INTERPRETED AS UNSIGNED. */ 1066 #define umul_ppmm(w1, w0, u, v) \ 1067 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1068 : "=r" ((USItype)(w1)), \ 1069 "=r" ((USItype)(w0)) \ 1070 : "r" ((USItype)(u)), \ 1071 "r" ((USItype)(v))) 1072 #define UMUL_TIME 5 1073 #ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ 1074 #define udiv_qrnnd(q, r, n1, n0, d) \ 1075 do { \ 1076 USItype __q; \ 1077 __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ 1078 : "=r" ((USItype)(__q)) \ 1079 : "r" ((USItype)(n1)), \ 1080 "r" ((USItype)(n0)), \ 1081 "r" ((USItype)(d))); \ 1082 (r) = (n0) - __q * (d); \ 1083 (q) = __q; \ 1084 } while (0) 1085 #define UDIV_TIME 25 1086 #endif /* SUPERSPARC */ 1087 #else /* ! __sparc_v8__ */ 1088 #if defined(__sparclite__) 1089 /* This has hardware multiply but not divide. It also has two additional 1090 instructions scan (ffs from high bit) and divscc. */ 1091 #define umul_ppmm(w1, w0, u, v) \ 1092 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1093 : "=r" ((USItype)(w1)), \ 1094 "=r" ((USItype)(w0)) \ 1095 : "r" ((USItype)(u)), \ 1096 "r" ((USItype)(v))) 1097 #define UMUL_TIME 5 1098 #define udiv_qrnnd(q, r, n1, n0, d) \ 1099 __asm__ ("! Inlined udiv_qrnnd\n" \ 1100 "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 1101 "tst %%g0\n" \ 1102 "divscc %3,%4,%%g1\n" \ 1103 "divscc %%g1,%4,%%g1\n" \ 1104 "divscc %%g1,%4,%%g1\n" \ 1105 "divscc %%g1,%4,%%g1\n" \ 1106 "divscc %%g1,%4,%%g1\n" \ 1107 "divscc %%g1,%4,%%g1\n" \ 1108 "divscc %%g1,%4,%%g1\n" \ 1109 "divscc %%g1,%4,%%g1\n" \ 1110 "divscc %%g1,%4,%%g1\n" \ 1111 "divscc %%g1,%4,%%g1\n" \ 1112 "divscc %%g1,%4,%%g1\n" \ 1113 "divscc %%g1,%4,%%g1\n" \ 1114 "divscc %%g1,%4,%%g1\n" \ 1115 "divscc %%g1,%4,%%g1\n" \ 1116 "divscc %%g1,%4,%%g1\n" \ 1117 "divscc %%g1,%4,%%g1\n" \ 1118 "divscc %%g1,%4,%%g1\n" \ 1119 "divscc %%g1,%4,%%g1\n" \ 1120 "divscc %%g1,%4,%%g1\n" \ 1121 "divscc %%g1,%4,%%g1\n" \ 1122 "divscc %%g1,%4,%%g1\n" \ 1123 "divscc %%g1,%4,%%g1\n" \ 1124 "divscc %%g1,%4,%%g1\n" \ 1125 "divscc %%g1,%4,%%g1\n" \ 1126 "divscc %%g1,%4,%%g1\n" \ 1127 "divscc %%g1,%4,%%g1\n" \ 1128 "divscc %%g1,%4,%%g1\n" \ 1129 "divscc %%g1,%4,%%g1\n" \ 1130 "divscc %%g1,%4,%%g1\n" \ 1131 "divscc %%g1,%4,%%g1\n" \ 1132 "divscc %%g1,%4,%%g1\n" \ 1133 "divscc %%g1,%4,%0\n" \ 1134 "rd %%y,%1\n" \ 1135 "bl,a 1f\n" \ 1136 "add %1,%4,%1\n" \ 1137 "1: ! End of inline udiv_qrnnd" \ 1138 : "=r" ((USItype)(q)), \ 1139 "=r" ((USItype)(r)) \ 1140 : "r" ((USItype)(n1)), \ 1141 "r" ((USItype)(n0)), \ 1142 "rI" ((USItype)(d)) \ 1143 : "%g1" __AND_CLOBBER_CC) 1144 #define UDIV_TIME 37 1145 #define count_leading_zeros(count, x) \ 1146 __asm__ ("scan %1,0,%0" \ 1147 : "=r" ((USItype)(x)) \ 1148 : "r" ((USItype)(count))) 1149 /* Early sparclites return 63 for an argument of 0, but they warn that future 1150 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 1151 undefined. */ 1152 #endif /* __sparclite__ */ 1153 #endif /* __sparc_v8__ */ 1154 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ 1155 #ifndef umul_ppmm 1156 #define umul_ppmm(w1, w0, u, v) \ 1157 __asm__ ("! Inlined umul_ppmm\n" \ 1158 "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ 1159 "sra %3,31,%%g2 ! Don't move this insn\n" \ 1160 "and %2,%%g2,%%g2 ! Don't move this insn\n" \ 1161 "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 1162 "mulscc %%g1,%3,%%g1\n" \ 1163 "mulscc %%g1,%3,%%g1\n" \ 1164 "mulscc %%g1,%3,%%g1\n" \ 1165 "mulscc %%g1,%3,%%g1\n" \ 1166 "mulscc %%g1,%3,%%g1\n" \ 1167 "mulscc %%g1,%3,%%g1\n" \ 1168 "mulscc %%g1,%3,%%g1\n" \ 1169 "mulscc %%g1,%3,%%g1\n" \ 1170 "mulscc %%g1,%3,%%g1\n" \ 1171 "mulscc %%g1,%3,%%g1\n" \ 1172 "mulscc %%g1,%3,%%g1\n" \ 1173 "mulscc %%g1,%3,%%g1\n" \ 1174 "mulscc %%g1,%3,%%g1\n" \ 1175 "mulscc %%g1,%3,%%g1\n" \ 1176 "mulscc %%g1,%3,%%g1\n" \ 1177 "mulscc %%g1,%3,%%g1\n" \ 1178 "mulscc %%g1,%3,%%g1\n" \ 1179 "mulscc %%g1,%3,%%g1\n" \ 1180 "mulscc %%g1,%3,%%g1\n" \ 1181 "mulscc %%g1,%3,%%g1\n" \ 1182 "mulscc %%g1,%3,%%g1\n" \ 1183 "mulscc %%g1,%3,%%g1\n" \ 1184 "mulscc %%g1,%3,%%g1\n" \ 1185 "mulscc %%g1,%3,%%g1\n" \ 1186 "mulscc %%g1,%3,%%g1\n" \ 1187 "mulscc %%g1,%3,%%g1\n" \ 1188 "mulscc %%g1,%3,%%g1\n" \ 1189 "mulscc %%g1,%3,%%g1\n" \ 1190 "mulscc %%g1,%3,%%g1\n" \ 1191 "mulscc %%g1,%3,%%g1\n" \ 1192 "mulscc %%g1,%3,%%g1\n" \ 1193 "mulscc %%g1,%3,%%g1\n" \ 1194 "mulscc %%g1,0,%%g1\n" \ 1195 "add %%g1,%%g2,%0\n" \ 1196 "rd %%y,%1" \ 1197 : "=r" ((USItype)(w1)), \ 1198 "=r" ((USItype)(w0)) \ 1199 : "%rI" ((USItype)(u)), \ 1200 "r" ((USItype)(v)) \ 1201 : "%g1", "%g2" __AND_CLOBBER_CC) 1202 #define UMUL_TIME 39 /* 39 instructions */ 1203 /* It's quite necessary to add this much assembler for the sparc. 1204 The default udiv_qrnnd (in C) is more than 10 times slower! */ 1205 #define udiv_qrnnd(q, r, n1, n0, d) \ 1206 __asm__ ("! Inlined udiv_qrnnd\n\t" \ 1207 "mov 32,%%g1\n\t" \ 1208 "subcc %1,%2,%%g0\n\t" \ 1209 "1: bcs 5f\n\t" \ 1210 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1211 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1212 "addx %1,%1,%1 ! so this can't give carry\n\t" \ 1213 "subcc %%g1,1,%%g1\n\t" \ 1214 "2: bne 1b\n\t" \ 1215 "subcc %1,%2,%%g0\n\t" \ 1216 "bcs 3f\n\t" \ 1217 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1218 "b 3f\n\t" \ 1219 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1220 "4: sub %1,%2,%1\n\t" \ 1221 "5: addxcc %1,%1,%1\n\t" \ 1222 "bcc 2b\n\t" \ 1223 "subcc %%g1,1,%%g1\n\t" \ 1224 "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ 1225 "bne 4b\n\t" \ 1226 "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ 1227 "sub %1,%2,%1\n\t" \ 1228 "3: xnor %0,0,%0\n\t" \ 1229 "! End of inline udiv_qrnnd\n" \ 1230 : "=&r" ((USItype)(q)), \ 1231 "=&r" ((USItype)(r)) \ 1232 : "r" ((USItype)(d)), \ 1233 "1" ((USItype)(n1)), \ 1234 "0" ((USItype)(n0)) : "%g1", "cc") 1235 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 1236 #endif 1237 #endif /* __sparc__ */ 1238 1239 /*************************************** 1240 ************** VAX ****************** 1241 ***************************************/ 1242 #if defined(__vax__) && W_TYPE_SIZE == 32 1243 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1244 __asm__ ("addl2 %5,%1\n" \ 1245 "adwc %3,%0" \ 1246 : "=g" ((USItype)(sh)), \ 1247 "=&g" ((USItype)(sl)) \ 1248 : "%0" ((USItype)(ah)), \ 1249 "g" ((USItype)(bh)), \ 1250 "%1" ((USItype)(al)), \ 1251 "g" ((USItype)(bl))) 1252 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1253 __asm__ ("subl2 %5,%1\n" \ 1254 "sbwc %3,%0" \ 1255 : "=g" ((USItype)(sh)), \ 1256 "=&g" ((USItype)(sl)) \ 1257 : "0" ((USItype)(ah)), \ 1258 "g" ((USItype)(bh)), \ 1259 "1" ((USItype)(al)), \ 1260 "g" ((USItype)(bl))) 1261 #define umul_ppmm(xh, xl, m0, m1) \ 1262 do { \ 1263 union {UDItype __ll; \ 1264 struct {USItype __l, __h; } __i; \ 1265 } __xx; \ 1266 USItype __m0 = (m0), __m1 = (m1); \ 1267 __asm__ ("emul %1,%2,$0,%0" \ 1268 : "=g" (__xx.__ll) \ 1269 : "g" (__m0), \ 1270 "g" (__m1)); \ 1271 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1272 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1273 + (((SItype) __m1 >> 31) & __m0)); \ 1274 } while (0) 1275 #define sdiv_qrnnd(q, r, n1, n0, d) \ 1276 do { \ 1277 union {DItype __ll; \ 1278 struct {SItype __l, __h; } __i; \ 1279 } __xx; \ 1280 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1281 __asm__ ("ediv %3,%2,%0,%1" \ 1282 : "=g" (q), "=g" (r) \ 1283 : "g" (__xx.__ll), "g" (d)); \ 1284 } while (0) 1285 #endif /* __vax__ */ 1286 1287 /*************************************** 1288 ************** Z8000 **************** 1289 ***************************************/ 1290 #if defined(__z8000__) && W_TYPE_SIZE == 16 1291 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1292 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1293 : "=r" ((unsigned int)(sh)), \ 1294 "=&r" ((unsigned int)(sl)) \ 1295 : "%0" ((unsigned int)(ah)), \ 1296 "r" ((unsigned int)(bh)), \ 1297 "%1" ((unsigned int)(al)), \ 1298 "rQR" ((unsigned int)(bl))) 1299 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1300 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1301 : "=r" ((unsigned int)(sh)), \ 1302 "=&r" ((unsigned int)(sl)) \ 1303 : "0" ((unsigned int)(ah)), \ 1304 "r" ((unsigned int)(bh)), \ 1305 "1" ((unsigned int)(al)), \ 1306 "rQR" ((unsigned int)(bl))) 1307 #define umul_ppmm(xh, xl, m0, m1) \ 1308 do { \ 1309 union {long int __ll; \ 1310 struct {unsigned int __h, __l; } __i; \ 1311 } __xx; \ 1312 unsigned int __m0 = (m0), __m1 = (m1); \ 1313 __asm__ ("mult %S0,%H3" \ 1314 : "=r" (__xx.__i.__h), \ 1315 "=r" (__xx.__i.__l) \ 1316 : "%1" (__m0), \ 1317 "rQR" (__m1)); \ 1318 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1319 (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1320 + (((signed int) __m1 >> 15) & __m0)); \ 1321 } while (0) 1322 #endif /* __z8000__ */ 1323 1324 #endif /* __GNUC__ */ 1325 1326 /*************************************** 1327 *********** Generic Versions ******** 1328 ***************************************/ 1329 #if !defined(umul_ppmm) && defined(__umulsidi3) 1330 #define umul_ppmm(ph, pl, m0, m1) \ 1331 { \ 1332 UDWtype __ll = __umulsidi3(m0, m1); \ 1333 ph = (UWtype) (__ll >> W_TYPE_SIZE); \ 1334 pl = (UWtype) __ll; \ 1335 } 1336 #endif 1337 1338 #if !defined(__umulsidi3) 1339 #define __umulsidi3(u, v) \ 1340 ({UWtype __hi, __lo; \ 1341 umul_ppmm(__hi, __lo, u, v); \ 1342 ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) 1343 #endif 1344 1345 /* If this machine has no inline assembler, use C macros. */ 1346 1347 #if !defined(add_ssaaaa) 1348 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1349 do { \ 1350 UWtype __x; \ 1351 __x = (al) + (bl); \ 1352 (sh) = (ah) + (bh) + (__x < (al)); \ 1353 (sl) = __x; \ 1354 } while (0) 1355 #endif 1356 1357 #if !defined(sub_ddmmss) 1358 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1359 do { \ 1360 UWtype __x; \ 1361 __x = (al) - (bl); \ 1362 (sh) = (ah) - (bh) - (__x > (al)); \ 1363 (sl) = __x; \ 1364 } while (0) 1365 #endif 1366 1367 #if !defined(umul_ppmm) 1368 #define umul_ppmm(w1, w0, u, v) \ 1369 do { \ 1370 UWtype __x0, __x1, __x2, __x3; \ 1371 UHWtype __ul, __vl, __uh, __vh; \ 1372 UWtype __u = (u), __v = (v); \ 1373 \ 1374 __ul = __ll_lowpart(__u); \ 1375 __uh = __ll_highpart(__u); \ 1376 __vl = __ll_lowpart(__v); \ 1377 __vh = __ll_highpart(__v); \ 1378 \ 1379 __x0 = (UWtype) __ul * __vl; \ 1380 __x1 = (UWtype) __ul * __vh; \ 1381 __x2 = (UWtype) __uh * __vl; \ 1382 __x3 = (UWtype) __uh * __vh; \ 1383 \ 1384 __x1 += __ll_highpart(__x0);/* this can't give carry */ \ 1385 __x1 += __x2; /* but this indeed can */ \ 1386 if (__x1 < __x2) /* did we get it? */ \ 1387 __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1388 \ 1389 (w1) = __x3 + __ll_highpart(__x1); \ 1390 (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ 1391 } while (0) 1392 #endif 1393 1394 #if !defined(umul_ppmm) 1395 #define smul_ppmm(w1, w0, u, v) \ 1396 do { \ 1397 UWtype __w1; \ 1398 UWtype __m0 = (u), __m1 = (v); \ 1399 umul_ppmm(__w1, w0, __m0, __m1); \ 1400 (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ 1401 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ 1402 } while (0) 1403 #endif 1404 1405 /* Define this unconditionally, so it can be used for debugging. */ 1406 #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1407 do { \ 1408 UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ 1409 __d1 = __ll_highpart(d); \ 1410 __d0 = __ll_lowpart(d); \ 1411 \ 1412 __r1 = (n1) % __d1; \ 1413 __q1 = (n1) / __d1; \ 1414 __m = (UWtype) __q1 * __d0; \ 1415 __r1 = __r1 * __ll_B | __ll_highpart(n0); \ 1416 if (__r1 < __m) { \ 1417 __q1--, __r1 += (d); \ 1418 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ 1419 if (__r1 < __m) \ 1420 __q1--, __r1 += (d); \ 1421 } \ 1422 __r1 -= __m; \ 1423 \ 1424 __r0 = __r1 % __d1; \ 1425 __q0 = __r1 / __d1; \ 1426 __m = (UWtype) __q0 * __d0; \ 1427 __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ 1428 if (__r0 < __m) { \ 1429 __q0--, __r0 += (d); \ 1430 if (__r0 >= (d)) \ 1431 if (__r0 < __m) \ 1432 __q0--, __r0 += (d); \ 1433 } \ 1434 __r0 -= __m; \ 1435 \ 1436 (q) = (UWtype) __q1 * __ll_B | __q0; \ 1437 (r) = __r0; \ 1438 } while (0) 1439 1440 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1441 __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1442 #if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) 1443 #define udiv_qrnnd(q, r, nh, nl, d) \ 1444 do { \ 1445 UWtype __r; \ 1446 (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ 1447 (r) = __r; \ 1448 } while (0) 1449 #endif 1450 1451 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1452 #if !defined(udiv_qrnnd) 1453 #define UDIV_NEEDS_NORMALIZATION 1 1454 #define udiv_qrnnd __udiv_qrnnd_c 1455 #endif 1456 1457 #undef count_leading_zeros 1458 #if !defined(count_leading_zeros) 1459 extern 1460 #ifdef __STDC__ 1461 const 1462 #endif 1463 unsigned char __clz_tab[]; 1464 #define count_leading_zeros(count, x) \ 1465 do { \ 1466 UWtype __xr = (x); \ 1467 UWtype __a; \ 1468 \ 1469 if (W_TYPE_SIZE <= 32) { \ 1470 __a = __xr < ((UWtype) 1 << 2*__BITS4) \ 1471 ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \ 1472 : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ 1473 } \ 1474 else { \ 1475 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ 1476 if (((__xr >> __a) & 0xff) != 0) \ 1477 break; \ 1478 } \ 1479 \ 1480 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ 1481 } while (0) 1482 /* This version gives a well-defined value for zero. */ 1483 #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 1484 #endif 1485 1486 #if !defined(count_trailing_zeros) 1487 /* Define count_trailing_zeros using count_leading_zeros. The latter might be 1488 defined in asm, but if it is not, the C version above is good enough. */ 1489 #define count_trailing_zeros(count, x) \ 1490 do { \ 1491 UWtype __ctz_x = (x); \ 1492 UWtype __ctz_c; \ 1493 count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \ 1494 (count) = W_TYPE_SIZE - 1 - __ctz_c; \ 1495 } while (0) 1496 #endif 1497 1498 #ifndef UDIV_NEEDS_NORMALIZATION 1499 #define UDIV_NEEDS_NORMALIZATION 0 1500 #endif 1501