1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28#include <linux/linkage.h> 29#include <asm/dwarf2.h> 30#include <asm/errno.h> 31 32/* 33 * computes a partial checksum, e.g. for TCP/UDP fragments 34 */ 35 36/* 37unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 38 */ 39 40.text 41 42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 43 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51ENTRY(csum_partial) 52 CFI_STARTPROC 53 pushl_cfi %esi 54 CFI_REL_OFFSET esi, 0 55 pushl_cfi %ebx 56 CFI_REL_OFFSET ebx, 0 57 movl 20(%esp),%eax # Function arg: unsigned int sum 58 movl 16(%esp),%ecx # Function arg: int len 59 movl 12(%esp),%esi # Function arg: unsigned char *buff 60 testl $3, %esi # Check alignment. 61 jz 2f # Jump if alignment is ok. 62 testl $1, %esi # Check alignment. 63 jz 10f # Jump if alignment is boundary of 2bytes. 64 65 # buf is odd 66 dec %ecx 67 jl 8f 68 movzbl (%esi), %ebx 69 adcl %ebx, %eax 70 roll $8, %eax 71 inc %esi 72 testl $2, %esi 73 jz 2f 7410: 75 subl $2, %ecx # Alignment uses up two bytes. 76 jae 1f # Jump if we had at least two bytes. 77 addl $2, %ecx # ecx was < 2. Deal with it. 78 jmp 4f 791: movw (%esi), %bx 80 addl $2, %esi 81 addw %bx, %ax 82 adcl $0, %eax 832: 84 movl %ecx, %edx 85 shrl $5, %ecx 86 jz 2f 87 testl %esi, %esi 881: movl (%esi), %ebx 89 adcl %ebx, %eax 90 movl 4(%esi), %ebx 91 adcl %ebx, %eax 92 movl 8(%esi), %ebx 93 adcl %ebx, %eax 94 movl 12(%esi), %ebx 95 adcl %ebx, %eax 96 movl 16(%esi), %ebx 97 adcl %ebx, %eax 98 movl 20(%esi), %ebx 99 adcl %ebx, %eax 100 movl 24(%esi), %ebx 101 adcl %ebx, %eax 102 movl 28(%esi), %ebx 103 adcl %ebx, %eax 104 lea 32(%esi), %esi 105 dec %ecx 106 jne 1b 107 adcl $0, %eax 1082: movl %edx, %ecx 109 andl $0x1c, %edx 110 je 4f 111 shrl $2, %edx # This clears CF 1123: adcl (%esi), %eax 113 lea 4(%esi), %esi 114 dec %edx 115 jne 3b 116 adcl $0, %eax 1174: andl $3, %ecx 118 jz 7f 119 cmpl $2, %ecx 120 jb 5f 121 movw (%esi),%cx 122 leal 2(%esi),%esi 123 je 6f 124 shll $16,%ecx 1255: movb (%esi),%cl 1266: addl %ecx,%eax 127 adcl $0, %eax 1287: 129 testl $1, 12(%esp) 130 jz 8f 131 roll $8, %eax 1328: 133 popl_cfi %ebx 134 CFI_RESTORE ebx 135 popl_cfi %esi 136 CFI_RESTORE esi 137 ret 138 CFI_ENDPROC 139ENDPROC(csum_partial) 140 141#else 142 143/* Version for PentiumII/PPro */ 144 145ENTRY(csum_partial) 146 CFI_STARTPROC 147 pushl_cfi %esi 148 CFI_REL_OFFSET esi, 0 149 pushl_cfi %ebx 150 CFI_REL_OFFSET ebx, 0 151 movl 20(%esp),%eax # Function arg: unsigned int sum 152 movl 16(%esp),%ecx # Function arg: int len 153 movl 12(%esp),%esi # Function arg: const unsigned char *buf 154 155 testl $3, %esi 156 jnz 25f 15710: 158 movl %ecx, %edx 159 movl %ecx, %ebx 160 andl $0x7c, %ebx 161 shrl $7, %ecx 162 addl %ebx,%esi 163 shrl $2, %ebx 164 negl %ebx 165 lea 45f(%ebx,%ebx,2), %ebx 166 testl %esi, %esi 167 jmp *%ebx 168 169 # Handle 2-byte-aligned regions 17020: addw (%esi), %ax 171 lea 2(%esi), %esi 172 adcl $0, %eax 173 jmp 10b 17425: 175 testl $1, %esi 176 jz 30f 177 # buf is odd 178 dec %ecx 179 jl 90f 180 movzbl (%esi), %ebx 181 addl %ebx, %eax 182 adcl $0, %eax 183 roll $8, %eax 184 inc %esi 185 testl $2, %esi 186 jz 10b 187 18830: subl $2, %ecx 189 ja 20b 190 je 32f 191 addl $2, %ecx 192 jz 80f 193 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 194 addl %ebx, %eax 195 adcl $0, %eax 196 jmp 80f 19732: 198 addw (%esi), %ax # csumming 2 bytes, 2-aligned 199 adcl $0, %eax 200 jmp 80f 201 20240: 203 addl -128(%esi), %eax 204 adcl -124(%esi), %eax 205 adcl -120(%esi), %eax 206 adcl -116(%esi), %eax 207 adcl -112(%esi), %eax 208 adcl -108(%esi), %eax 209 adcl -104(%esi), %eax 210 adcl -100(%esi), %eax 211 adcl -96(%esi), %eax 212 adcl -92(%esi), %eax 213 adcl -88(%esi), %eax 214 adcl -84(%esi), %eax 215 adcl -80(%esi), %eax 216 adcl -76(%esi), %eax 217 adcl -72(%esi), %eax 218 adcl -68(%esi), %eax 219 adcl -64(%esi), %eax 220 adcl -60(%esi), %eax 221 adcl -56(%esi), %eax 222 adcl -52(%esi), %eax 223 adcl -48(%esi), %eax 224 adcl -44(%esi), %eax 225 adcl -40(%esi), %eax 226 adcl -36(%esi), %eax 227 adcl -32(%esi), %eax 228 adcl -28(%esi), %eax 229 adcl -24(%esi), %eax 230 adcl -20(%esi), %eax 231 adcl -16(%esi), %eax 232 adcl -12(%esi), %eax 233 adcl -8(%esi), %eax 234 adcl -4(%esi), %eax 23545: 236 lea 128(%esi), %esi 237 adcl $0, %eax 238 dec %ecx 239 jge 40b 240 movl %edx, %ecx 24150: andl $3, %ecx 242 jz 80f 243 244 # Handle the last 1-3 bytes without jumping 245 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 246 movl $0xffffff,%ebx # by the shll and shrl instructions 247 shll $3,%ecx 248 shrl %cl,%ebx 249 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 250 addl %ebx,%eax 251 adcl $0,%eax 25280: 253 testl $1, 12(%esp) 254 jz 90f 255 roll $8, %eax 25690: 257 popl_cfi %ebx 258 CFI_RESTORE ebx 259 popl_cfi %esi 260 CFI_RESTORE esi 261 ret 262 CFI_ENDPROC 263ENDPROC(csum_partial) 264 265#endif 266 267/* 268unsigned int csum_partial_copy_generic (const char *src, char *dst, 269 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 270 */ 271 272/* 273 * Copy from ds while checksumming, otherwise like csum_partial 274 * 275 * The macros SRC and DST specify the type of access for the instruction. 276 * thus we can call a custom exception handler for all access types. 277 * 278 * FIXME: could someone double-check whether I haven't mixed up some SRC and 279 * DST definitions? It's damn hard to trigger all cases. I hope I got 280 * them all but there's no guarantee. 281 */ 282 283#define SRC(y...) \ 284 9999: y; \ 285 .section __ex_table, "a"; \ 286 .long 9999b, 6001f ; \ 287 .previous 288 289#define DST(y...) \ 290 9999: y; \ 291 .section __ex_table, "a"; \ 292 .long 9999b, 6002f ; \ 293 .previous 294 295#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 296 297#define ARGBASE 16 298#define FP 12 299 300ENTRY(csum_partial_copy_generic) 301 CFI_STARTPROC 302 subl $4,%esp 303 CFI_ADJUST_CFA_OFFSET 4 304 pushl_cfi %edi 305 CFI_REL_OFFSET edi, 0 306 pushl_cfi %esi 307 CFI_REL_OFFSET esi, 0 308 pushl_cfi %ebx 309 CFI_REL_OFFSET ebx, 0 310 movl ARGBASE+16(%esp),%eax # sum 311 movl ARGBASE+12(%esp),%ecx # len 312 movl ARGBASE+4(%esp),%esi # src 313 movl ARGBASE+8(%esp),%edi # dst 314 315 testl $2, %edi # Check alignment. 316 jz 2f # Jump if alignment is ok. 317 subl $2, %ecx # Alignment uses up two bytes. 318 jae 1f # Jump if we had at least two bytes. 319 addl $2, %ecx # ecx was < 2. Deal with it. 320 jmp 4f 321SRC(1: movw (%esi), %bx ) 322 addl $2, %esi 323DST( movw %bx, (%edi) ) 324 addl $2, %edi 325 addw %bx, %ax 326 adcl $0, %eax 3272: 328 movl %ecx, FP(%esp) 329 shrl $5, %ecx 330 jz 2f 331 testl %esi, %esi 332SRC(1: movl (%esi), %ebx ) 333SRC( movl 4(%esi), %edx ) 334 adcl %ebx, %eax 335DST( movl %ebx, (%edi) ) 336 adcl %edx, %eax 337DST( movl %edx, 4(%edi) ) 338 339SRC( movl 8(%esi), %ebx ) 340SRC( movl 12(%esi), %edx ) 341 adcl %ebx, %eax 342DST( movl %ebx, 8(%edi) ) 343 adcl %edx, %eax 344DST( movl %edx, 12(%edi) ) 345 346SRC( movl 16(%esi), %ebx ) 347SRC( movl 20(%esi), %edx ) 348 adcl %ebx, %eax 349DST( movl %ebx, 16(%edi) ) 350 adcl %edx, %eax 351DST( movl %edx, 20(%edi) ) 352 353SRC( movl 24(%esi), %ebx ) 354SRC( movl 28(%esi), %edx ) 355 adcl %ebx, %eax 356DST( movl %ebx, 24(%edi) ) 357 adcl %edx, %eax 358DST( movl %edx, 28(%edi) ) 359 360 lea 32(%esi), %esi 361 lea 32(%edi), %edi 362 dec %ecx 363 jne 1b 364 adcl $0, %eax 3652: movl FP(%esp), %edx 366 movl %edx, %ecx 367 andl $0x1c, %edx 368 je 4f 369 shrl $2, %edx # This clears CF 370SRC(3: movl (%esi), %ebx ) 371 adcl %ebx, %eax 372DST( movl %ebx, (%edi) ) 373 lea 4(%esi), %esi 374 lea 4(%edi), %edi 375 dec %edx 376 jne 3b 377 adcl $0, %eax 3784: andl $3, %ecx 379 jz 7f 380 cmpl $2, %ecx 381 jb 5f 382SRC( movw (%esi), %cx ) 383 leal 2(%esi), %esi 384DST( movw %cx, (%edi) ) 385 leal 2(%edi), %edi 386 je 6f 387 shll $16,%ecx 388SRC(5: movb (%esi), %cl ) 389DST( movb %cl, (%edi) ) 3906: addl %ecx, %eax 391 adcl $0, %eax 3927: 3935000: 394 395# Exception handler: 396.section .fixup, "ax" 397 3986001: 399 movl ARGBASE+20(%esp), %ebx # src_err_ptr 400 movl $-EFAULT, (%ebx) 401 402 # zero the complete destination - computing the rest 403 # is too much work 404 movl ARGBASE+8(%esp), %edi # dst 405 movl ARGBASE+12(%esp), %ecx # len 406 xorl %eax,%eax 407 rep ; stosb 408 409 jmp 5000b 410 4116002: 412 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 413 movl $-EFAULT,(%ebx) 414 jmp 5000b 415 416.previous 417 418 popl_cfi %ebx 419 CFI_RESTORE ebx 420 popl_cfi %esi 421 CFI_RESTORE esi 422 popl_cfi %edi 423 CFI_RESTORE edi 424 popl_cfi %ecx # equivalent to addl $4,%esp 425 ret 426 CFI_ENDPROC 427ENDPROC(csum_partial_copy_generic) 428 429#else 430 431/* Version for PentiumII/PPro */ 432 433#define ROUND1(x) \ 434 SRC(movl x(%esi), %ebx ) ; \ 435 addl %ebx, %eax ; \ 436 DST(movl %ebx, x(%edi) ) ; 437 438#define ROUND(x) \ 439 SRC(movl x(%esi), %ebx ) ; \ 440 adcl %ebx, %eax ; \ 441 DST(movl %ebx, x(%edi) ) ; 442 443#define ARGBASE 12 444 445ENTRY(csum_partial_copy_generic) 446 CFI_STARTPROC 447 pushl_cfi %ebx 448 CFI_REL_OFFSET ebx, 0 449 pushl_cfi %edi 450 CFI_REL_OFFSET edi, 0 451 pushl_cfi %esi 452 CFI_REL_OFFSET esi, 0 453 movl ARGBASE+4(%esp),%esi #src 454 movl ARGBASE+8(%esp),%edi #dst 455 movl ARGBASE+12(%esp),%ecx #len 456 movl ARGBASE+16(%esp),%eax #sum 457# movl %ecx, %edx 458 movl %ecx, %ebx 459 movl %esi, %edx 460 shrl $6, %ecx 461 andl $0x3c, %ebx 462 negl %ebx 463 subl %ebx, %esi 464 subl %ebx, %edi 465 lea -1(%esi),%edx 466 andl $-32,%edx 467 lea 3f(%ebx,%ebx), %ebx 468 testl %esi, %esi 469 jmp *%ebx 4701: addl $64,%esi 471 addl $64,%edi 472 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 473 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 474 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 475 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 476 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4773: adcl $0,%eax 478 addl $64, %edx 479 dec %ecx 480 jge 1b 4814: movl ARGBASE+12(%esp),%edx #len 482 andl $3, %edx 483 jz 7f 484 cmpl $2, %edx 485 jb 5f 486SRC( movw (%esi), %dx ) 487 leal 2(%esi), %esi 488DST( movw %dx, (%edi) ) 489 leal 2(%edi), %edi 490 je 6f 491 shll $16,%edx 4925: 493SRC( movb (%esi), %dl ) 494DST( movb %dl, (%edi) ) 4956: addl %edx, %eax 496 adcl $0, %eax 4977: 498.section .fixup, "ax" 4996001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 500 movl $-EFAULT, (%ebx) 501 # zero the complete destination (computing the rest is too much work) 502 movl ARGBASE+8(%esp),%edi # dst 503 movl ARGBASE+12(%esp),%ecx # len 504 xorl %eax,%eax 505 rep; stosb 506 jmp 7b 5076002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 508 movl $-EFAULT, (%ebx) 509 jmp 7b 510.previous 511 512 popl_cfi %esi 513 CFI_RESTORE esi 514 popl_cfi %edi 515 CFI_RESTORE edi 516 popl_cfi %ebx 517 CFI_RESTORE ebx 518 ret 519 CFI_ENDPROC 520ENDPROC(csum_partial_copy_generic) 521 522#undef ROUND 523#undef ROUND1 524 525#endif 526