1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * INET		An implementation of the TCP/IP protocol suite for the LINUX
4 *		operating system.  INET is implemented using the  BSD Socket
5 *		interface as the means of communication with the user level.
6 *
7 *		IP/TCP/UDP checksumming routines
8 *
9 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
10 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11 *		Tom May, <ftom@netcom.com>
12 *              Pentium Pro/II routines:
13 *              Alexander Kjeldaas <astor@guardian.no>
14 *              Finn Arne Gangstad <finnag@guardian.no>
15 *		Lots of code moved from tcp.c and ip.c; see those files
16 *		for more names.
17 *
18 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19 *			     handling.
20 *		Andi Kleen,  add zeroing on error
21 *                   converted to pure assembler
22 */
23
24#include <asm/errno.h>
25#include <asm/asm.h>
26#include <asm/export.h>
27
28/*
29 * computes a partial checksum, e.g. for TCP/UDP fragments
30 */
31
32/*
33unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
34 */
35
36.text
37.align 4
38.globl csum_partial
39
40#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
41
42	  /*
43	   * Experiments with Ethernet and SLIP connections show that buff
44	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
45	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
46	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
47	   * alignment for the unrolled loop.
48	   */
49csum_partial:
50	pushl %esi
51	pushl %ebx
52	movl 20(%esp),%eax	# Function arg: unsigned int sum
53	movl 16(%esp),%ecx	# Function arg: int len
54	movl 12(%esp),%esi	# Function arg: unsigned char *buff
55	testl $2, %esi		# Check alignment.
56	jz 2f			# Jump if alignment is ok.
57	subl $2, %ecx		# Alignment uses up two bytes.
58	jae 1f			# Jump if we had at least two bytes.
59	addl $2, %ecx		# ecx was < 2.  Deal with it.
60	jmp 4f
611:	movw (%esi), %bx
62	addl $2, %esi
63	addw %bx, %ax
64	adcl $0, %eax
652:
66	movl %ecx, %edx
67	shrl $5, %ecx
68	jz 2f
69	testl %esi, %esi
701:	movl (%esi), %ebx
71	adcl %ebx, %eax
72	movl 4(%esi), %ebx
73	adcl %ebx, %eax
74	movl 8(%esi), %ebx
75	adcl %ebx, %eax
76	movl 12(%esi), %ebx
77	adcl %ebx, %eax
78	movl 16(%esi), %ebx
79	adcl %ebx, %eax
80	movl 20(%esi), %ebx
81	adcl %ebx, %eax
82	movl 24(%esi), %ebx
83	adcl %ebx, %eax
84	movl 28(%esi), %ebx
85	adcl %ebx, %eax
86	lea 32(%esi), %esi
87	dec %ecx
88	jne 1b
89	adcl $0, %eax
902:	movl %edx, %ecx
91	andl $0x1c, %edx
92	je 4f
93	shrl $2, %edx		# This clears CF
943:	adcl (%esi), %eax
95	lea 4(%esi), %esi
96	dec %edx
97	jne 3b
98	adcl $0, %eax
994:	andl $3, %ecx
100	jz 7f
101	cmpl $2, %ecx
102	jb 5f
103	movw (%esi),%cx
104	leal 2(%esi),%esi
105	je 6f
106	shll $16,%ecx
1075:	movb (%esi),%cl
1086:	addl %ecx,%eax
109	adcl $0, %eax
1107:
111	popl %ebx
112	popl %esi
113	ret
114
115#else
116
117/* Version for PentiumII/PPro */
118
119csum_partial:
120	pushl %esi
121	pushl %ebx
122	movl 20(%esp),%eax	# Function arg: unsigned int sum
123	movl 16(%esp),%ecx	# Function arg: int len
124	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
125
126	testl $2, %esi
127	jnz 30f
12810:
129	movl %ecx, %edx
130	movl %ecx, %ebx
131	andl $0x7c, %ebx
132	shrl $7, %ecx
133	addl %ebx,%esi
134	shrl $2, %ebx
135	negl %ebx
136	lea 45f(%ebx,%ebx,2), %ebx
137	testl %esi, %esi
138	jmp *%ebx
139
140	# Handle 2-byte-aligned regions
14120:	addw (%esi), %ax
142	lea 2(%esi), %esi
143	adcl $0, %eax
144	jmp 10b
145
14630:	subl $2, %ecx
147	ja 20b
148	je 32f
149	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
150	addl %ebx, %eax
151	adcl $0, %eax
152	jmp 80f
15332:
154	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
155	adcl $0, %eax
156	jmp 80f
157
15840:
159	addl -128(%esi), %eax
160	adcl -124(%esi), %eax
161	adcl -120(%esi), %eax
162	adcl -116(%esi), %eax
163	adcl -112(%esi), %eax
164	adcl -108(%esi), %eax
165	adcl -104(%esi), %eax
166	adcl -100(%esi), %eax
167	adcl -96(%esi), %eax
168	adcl -92(%esi), %eax
169	adcl -88(%esi), %eax
170	adcl -84(%esi), %eax
171	adcl -80(%esi), %eax
172	adcl -76(%esi), %eax
173	adcl -72(%esi), %eax
174	adcl -68(%esi), %eax
175	adcl -64(%esi), %eax
176	adcl -60(%esi), %eax
177	adcl -56(%esi), %eax
178	adcl -52(%esi), %eax
179	adcl -48(%esi), %eax
180	adcl -44(%esi), %eax
181	adcl -40(%esi), %eax
182	adcl -36(%esi), %eax
183	adcl -32(%esi), %eax
184	adcl -28(%esi), %eax
185	adcl -24(%esi), %eax
186	adcl -20(%esi), %eax
187	adcl -16(%esi), %eax
188	adcl -12(%esi), %eax
189	adcl -8(%esi), %eax
190	adcl -4(%esi), %eax
19145:
192	lea 128(%esi), %esi
193	adcl $0, %eax
194	dec %ecx
195	jge 40b
196	movl %edx, %ecx
19750:	andl $3, %ecx
198	jz 80f
199
200	# Handle the last 1-3 bytes without jumping
201	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
202	movl $0xffffff,%ebx	# by the shll and shrl instructions
203	shll $3,%ecx
204	shrl %cl,%ebx
205	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
206	addl %ebx,%eax
207	adcl $0,%eax
20880:
209	popl %ebx
210	popl %esi
211	ret
212
213#endif
214	EXPORT_SYMBOL(csum_partial)
215