1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28/* 29 * computes a partial checksum, e.g. for TCP/UDP fragments 30 */ 31 32/* 33unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 34 */ 35 36#include <asm.inc> 37 38.code 39.align 4 40PUBLIC _csum_partial 41 42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 43 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51_csum_partial: 52 push esi 53 push ebx 54 mov eax, [esp + 20] // Function arg: unsigned int sum 55 mov ecx, [esp + 16] // Function arg: int len 56 mov esi, [esp + 12] // Function arg: unsigned char *buff 57 test esi, 3 // Check alignment. 58 jz m2 // Jump if alignment is ok. 59 test esi, 1 // Check alignment. 60 jz l10 // Jump if alignment is boundary of 2bytes. 61 62 // buf is odd 63 dec ecx 64 jl l8 65 movzx ebx, byte ptr [esi] 66 adc eax, ebx 67 rol eax, 8 68 inc esi 69 test esi, 2 70 jz m2 71l10: 72 sub ecx, 2 // Alignment uses up two bytes. 73 jae m1 // Jump if we had at least two bytes. 74 add ecx, 2 // ecx was < 2. Deal with it. 75 jmp l4 76m1: mov bx, [esi] 77 add esi, 2 78 add ax, bx 79 adc eax, 0 80m2: 81 mov edx, ecx 82 shr ecx, 5 83 jz l2 84 test esi, esi 85l1: mov ebx, [esi] 86 adc eax, ebx 87 mov ebx, [esi + 4] 88 adc eax, ebx 89 mov ebx, [esi + 8] 90 adc eax, ebx 91 mov ebx, [esi + 12] 92 adc eax, ebx 93 mov ebx, [esi + 16] 94 adc eax, ebx 95 mov ebx, [esi + 20] 96 adc eax, ebx 97 mov ebx, [esi + 24] 98 adc eax, ebx 99 mov ebx, [esi + 28] 100 adc eax, ebx 101 lea esi, [esi + 32] 102 dec ecx 103 jne l1 104 adc eax, 0 105l2: mov ecx, edx 106 and edx, HEX(1c) 107 je l4 108 shr edx, 2 // This clears CF 109l3: adc eax, [esi] 110 lea esi, [esi + 4] 111 dec edx 112 jne l3 113 adc eax, 0 114l4: and ecx, 3 115 jz l7 116 cmp ecx, 2 117 jb l5 118 mov cx, [esi] 119 lea esi, [esi + 2] 120 je l6 121 shl ecx, 16 122l5: mov cl, [esi] 123l6: add eax, ecx 124 adc eax, 0 125l7: 126 test dword ptr [esp + 12], 1 127 jz l8 128 rol eax, 8 129l8: 130 pop ebx 131 pop esi 132 ret 133 134#else 135 136/* Version for PentiumII/PPro */ 137 138csum_partial: 139 push esi 140 push ebx 141 mov eax, [esp + 20] # Function arg: unsigned int sum 142 mov ecx, [esp + 16] # Function arg: int len 143 mov esi, [esp + 12] # Function arg: const unsigned char *buf 144 145 test esi, 3 146 jnz l25f 147l10: 148 mov edx, ecx 149 mov ebx, ecx 150 and ebx, HEX(7c) 151 shr ecx, 7 152 add esi, ebx 153 shr ebx, 2 154 neg ebx 155 lea ebx, l45[ebx + ebx * 2] 156 test esi, esi 157 jmp dword ptr [ebx] 158 159 // Handle 2-byte-aligned regions 160l20: add ax, [esi] 161 lea esi, [esi + 2] 162 adc eax, 0 163 jmp l10b 164l25: 165 test esi, 1 166 jz l30f 167 // buf is odd 168 dec ecx 169 jl l90 170 movzb ebx, [esi] 171 add eax, ebx 172 adc eax, 0 173 rol eax, 8 174 inc esi 175 test esi, 2 176 jz l10b 177 178l30: sub ecx, 2 179 ja l20 180 je l32 181 add ecx, 2 182 jz l80 183 movzb ebx, [esi] // csumming 1 byte, 2-aligned 184 add eax, ebx 185 adc eax, 0 186 jmp l80 187l32: 188 add ax, [esi] // csumming 2 bytes, 2-aligned 189 adc eax, 0 190 jmp l80 191 192l40: 193 add eax, [esi -128] 194 adc eax, [esi -124] 195 adc eax, [esi -120] 196 adc eax, [esi -116] 197 adc eax, [esi -112] 198 adc eax, [esi -108] 199 adc eax, [esi -104] 200 adc eax, [esi -100] 201 adc eax, [esi -96] 202 adc eax, [esi -92] 203 adc eax, [esi -88] 204 adc eax, [esi -84] 205 adc eax, [esi -80] 206 adc eax, [esi -76] 207 adc eax, [esi -72] 208 adc eax, [esi -68] 209 adc eax, [esi -64] 210 adc eax, [esi -60] 211 adc eax, [esi -56] 212 adc eax, [esi -52] 213 adc eax, [esi -48] 214 adc eax, [esi -44] 215 adc eax, [esi -40] 216 adc eax, [esi -36] 217 adc eax, [esi -32] 218 adc eax, [esi -28] 219 adc eax, [esi -24] 220 adc eax, [esi -20] 221 adc eax, [esi -16] 222 adc eax, [esi -12] 223 adc eax, [esi -8] 224 adc eax, [esi -4] 225l45: 226 lea esi, [esi + 128] 227 adc eax, 0 228 dec ecx 229 jge l40 230 mov ecx, edx 231l50: and ecx, 3 232 jz l80 233 234 // Handle the last 1-3 bytes without jumping 235 not ecx // 1->2, 2->1, 3->0, higher bits are masked 236 mov ebx, HEX(ffffff) // by the shll and shrl instructions 237 shl ecx, 3 238 shr ebx, cl 239 and ebx, [esi -128] // esi is 4-aligned so should be ok 240 add eax, ebx 241 adc eax, 0 242l80: 243 test dword ptr [esp + 12], 1 244 jz l90 245 rol eax, 8 246l90: 247 pop ebx 248 pop esi 249 ret 250 251#endif 252 253END 254