1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IP/TCP/UDP checksumming routines
7 *
8 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
9 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 *		Tom May, <ftom@netcom.com>
11 *              Pentium Pro/II routines:
12 *              Alexander Kjeldaas <astor@guardian.no>
13 *              Finn Arne Gangstad <finnag@guardian.no>
14 *		Lots of code moved from tcp.c and ip.c; see those files
15 *		for more names.
16 *
17 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 *			     handling.
19 *		Andi Kleen,  add zeroing on error
20 *                   converted to pure assembler
21 *
22 *		This program is free software; you can redistribute it and/or
23 *		modify it under the terms of the GNU General Public License
24 *		as published by the Free Software Foundation; either version
25 *		2 of the License, or (at your option) any later version.
26 */
27
28/*
29 * computes a partial checksum, e.g. for TCP/UDP fragments
30 */
31
32/*
33unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
34 */
35
36#include <asm.inc>
37
38.code
39.align 4
40PUBLIC _csum_partial
41
42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
43
44	  /*
45	   * Experiments with Ethernet and SLIP connections show that buff
46	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
47	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49	   * alignment for the unrolled loop.
50	   */
51_csum_partial:
52	push esi
53	push ebx
54	mov eax, [esp + 20]	// Function arg: unsigned int sum
55	mov ecx, [esp + 16]	// Function arg: int len
56	mov esi, [esp + 12]	// Function arg: unsigned char *buff
57	test esi, 3	    	// Check alignment.
58	jz m2			// Jump if alignment is ok.
59	test esi, 1		// Check alignment.
60	jz l10			// Jump if alignment is boundary of 2bytes.
61
62	// buf is odd
63	dec ecx
64	jl l8
65	movzx ebx, byte ptr [esi]
66	adc eax, ebx
67	rol eax, 8
68	inc esi
69	test esi, 2
70	jz m2
71l10:
72	sub ecx, 2		// Alignment uses up two bytes.
73	jae m1			// Jump if we had at least two bytes.
74	add ecx, 2		// ecx was < 2.  Deal with it.
75	jmp l4
76m1:	mov bx, [esi]
77	add esi, 2
78	add ax, bx
79	adc eax, 0
80m2:
81	mov edx, ecx
82	shr ecx, 5
83	jz l2
84	test esi, esi
85l1:	mov ebx, [esi]
86	adc eax, ebx
87	mov ebx, [esi + 4]
88	adc eax, ebx
89	mov ebx, [esi + 8]
90	adc eax, ebx
91	mov ebx, [esi + 12]
92	adc eax, ebx
93	mov ebx, [esi + 16]
94	adc eax, ebx
95	mov ebx, [esi + 20]
96	adc eax, ebx
97	mov ebx, [esi + 24]
98	adc eax, ebx
99	mov ebx, [esi + 28]
100	adc eax, ebx
101	lea esi, [esi + 32]
102	dec ecx
103	jne l1
104	adc eax, 0
105l2:	mov ecx, edx
106	and edx, HEX(1c)
107	je l4
108	shr edx, 2		// This clears CF
109l3:	adc eax, [esi]
110	lea esi, [esi + 4]
111	dec edx
112	jne l3
113	adc eax, 0
114l4:	and ecx, 3
115	jz l7
116	cmp ecx, 2
117	jb l5
118	mov cx, [esi]
119	lea esi, [esi + 2]
120	je l6
121	shl ecx, 16
122l5:	mov cl, [esi]
123l6:	add eax, ecx
124	adc eax, 0
125l7:
126	test dword ptr [esp + 12], 1
127	jz l8
128	rol eax, 8
129l8:
130	pop ebx
131	pop esi
132	ret
133
134#else
135
136/* Version for PentiumII/PPro */
137
138csum_partial:
139	push esi
140	push ebx
141	mov eax, [esp + 20]	# Function arg: unsigned int sum
142	mov ecx, [esp + 16]	# Function arg: int len
143	mov esi, [esp + 12]	# Function arg:	const unsigned char *buf
144
145	test esi, 3
146	jnz l25f
147l10:
148	mov edx, ecx
149	mov ebx, ecx
150	and ebx, HEX(7c)
151	shr ecx, 7
152	add esi, ebx
153	shr ebx, 2
154	neg ebx
155	lea ebx, l45[ebx + ebx * 2]
156	test esi, esi
157	jmp dword ptr [ebx]
158
159	// Handle 2-byte-aligned regions
160l20: add ax, [esi]
161	lea esi, [esi + 2]
162	adc eax, 0
163	jmp l10b
164l25:
165	test esi, 1
166	jz l30f
167	// buf is odd
168	dec ecx
169	jl l90
170	movzb ebx, [esi]
171	add eax, ebx
172	adc eax, 0
173	rol eax, 8
174	inc esi
175	test esi, 2
176	jz l10b
177
178l30: sub ecx, 2
179	ja l20
180	je l32
181	add ecx, 2
182	jz l80
183	movzb ebx, [esi]	// csumming 1 byte, 2-aligned
184	add eax, ebx
185	adc eax, 0
186	jmp l80
187l32:
188	add ax, [esi]	// csumming 2 bytes, 2-aligned
189	adc eax, 0
190	jmp l80
191
192l40:
193	add eax, [esi -128]
194	adc eax, [esi -124]
195	adc eax, [esi -120]
196	adc eax, [esi -116]
197	adc eax, [esi -112]
198	adc eax, [esi -108]
199	adc eax, [esi -104]
200	adc eax, [esi -100]
201	adc eax, [esi -96]
202	adc eax, [esi -92]
203	adc eax, [esi -88]
204	adc eax, [esi -84]
205	adc eax, [esi -80]
206	adc eax, [esi -76]
207	adc eax, [esi -72]
208	adc eax, [esi -68]
209	adc eax, [esi -64]
210	adc eax, [esi -60]
211	adc eax, [esi -56]
212	adc eax, [esi -52]
213	adc eax, [esi -48]
214	adc eax, [esi -44]
215	adc eax, [esi -40]
216	adc eax, [esi -36]
217	adc eax, [esi -32]
218	adc eax, [esi -28]
219	adc eax, [esi -24]
220	adc eax, [esi -20]
221	adc eax, [esi -16]
222	adc eax, [esi -12]
223	adc eax, [esi -8]
224	adc eax, [esi -4]
225l45:
226	lea esi, [esi + 128]
227	adc eax, 0
228	dec ecx
229	jge l40
230	mov ecx, edx
231l50:	and ecx, 3
232	jz l80
233
234	// Handle the last 1-3 bytes without jumping
235	not ecx		// 1->2, 2->1, 3->0, higher bits are masked
236	mov ebx, HEX(ffffff)	// by the shll and shrl instructions
237	shl ecx, 3
238	shr ebx, cl
239	and ebx, [esi -128]	// esi is 4-aligned so should be ok
240	add eax, ebx
241	adc eax, 0
242l80:
243	test dword ptr [esp + 12], 1
244	jz l90
245	rol eax, 8
246l90:
247	pop ebx
248	pop esi
249	ret
250
251#endif
252
253END
254