1 /*
2  * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 /** @file
27  *
28  * TCP/IP checksum
29  *
30  */
31 
32 #include <limits.h>
33 #include <ipxe/tcpip.h>
34 
35 extern char x86_tcpip_loop_end[];
36 
37 /**
38  * Calculate continued TCP/IP checkum
39  *
40  * @v partial		Checksum of already-summed data, in network byte order
41  * @v data		Data buffer
42  * @v len		Length of data buffer
43  * @ret cksum		Updated checksum, in network byte order
44  */
tcpip_continue_chksum(uint16_t partial,const void * data,size_t len)45 uint16_t tcpip_continue_chksum ( uint16_t partial, const void *data,
46 				 size_t len ) {
47 	unsigned long sum = ( ( ~partial ) & 0xffff );
48 	unsigned long initial_word_count;
49 	unsigned long loop_count;
50 	unsigned long loop_partial_count;
51 	unsigned long final_word_count;
52 	unsigned long final_byte;
53 	unsigned long discard_S;
54 	unsigned long discard_c;
55 	unsigned long discard_a;
56 	unsigned long discard_r1;
57 	unsigned long discard_r2;
58 
59 	/* Calculate number of initial 16-bit words required to bring
60 	 * the main loop into alignment.  (We don't care about the
61 	 * speed for data aligned to less than 16 bits, since this
62 	 * situation won't occur in practice.)
63 	 */
64 	if ( len >= sizeof ( sum ) ) {
65 		initial_word_count = ( ( -( ( intptr_t ) data ) &
66 					 ( sizeof ( sum ) - 1 ) ) >> 1 );
67 	} else {
68 		initial_word_count = 0;
69 	}
70 	len -= ( initial_word_count * 2 );
71 
72 	/* Calculate number of iterations of the main loop.  This loop
73 	 * processes native machine words (32-bit or 64-bit), and is
74 	 * unrolled 16 times.  We calculate an overall iteration
75 	 * count, and a starting point for the first iteration.
76 	 */
77 	loop_count = ( len / ( sizeof ( sum ) * 16 ) );
78 	loop_partial_count =
79 		( ( len % ( sizeof ( sum ) * 16 ) ) / sizeof ( sum ) );
80 
81 	/* Calculate number of 16-bit words remaining after the main
82 	 * loop completes.
83 	 */
84 	final_word_count = ( ( len % sizeof ( sum ) ) / 2 );
85 
86 	/* Calculate whether or not a final byte remains at the end */
87 	final_byte = ( len & 1 );
88 
89 	/* Calculate the checksum */
90 	__asm__ ( /* Calculate position at which to jump into the
91 		   * unrolled loop.
92 		   */
93 		  "imul $( -x86_tcpip_loop_step_size ), %4\n\t"
94 		  "add %5, %4\n\t"
95 
96 		  /* Clear carry flag before starting checksumming */
97 		  "clc\n\t"
98 
99 		  /* Checksum initial words */
100 		  "jmp 2f\n\t"
101 		  "\n1:\n\t"
102 		  "lodsw\n\t"
103 		  "adcw %w2, %w0\n\t"
104 		  "\n2:\n\t"
105 		  "loop 1b\n\t"
106 
107 		  /* Main "lods;adc" loop, unrolled x16 */
108 		  "mov %12, %3\n\t"
109 		  "jmp *%4\n\t"
110 		  "\nx86_tcpip_loop_start:\n\t"
111 		  "lods%z2\n\tadc %2, %0\n\t"
112 		  "lods%z2\n\tadc %2, %0\n\t"
113 		  "lods%z2\n\tadc %2, %0\n\t"
114 		  "lods%z2\n\tadc %2, %0\n\t"
115 		  "lods%z2\n\tadc %2, %0\n\t"
116 		  "lods%z2\n\tadc %2, %0\n\t"
117 		  "lods%z2\n\tadc %2, %0\n\t"
118 		  "lods%z2\n\tadc %2, %0\n\t"
119 		  "lods%z2\n\tadc %2, %0\n\t"
120 		  "lods%z2\n\tadc %2, %0\n\t"
121 		  "lods%z2\n\tadc %2, %0\n\t"
122 		  "lods%z2\n\tadc %2, %0\n\t"
123 		  "lods%z2\n\tadc %2, %0\n\t"
124 		  "lods%z2\n\tadc %2, %0\n\t"
125 		  "lods%z2\n\tadc %2, %0\n\t"
126 		  "lods%z2\n\tadc %2, %0\n\t"
127 		  "\nx86_tcpip_loop_end:\n\t"
128 		  "loop x86_tcpip_loop_start\n\t"
129 		  ".equ x86_tcpip_loop_step_size, "
130 		  "  ( ( x86_tcpip_loop_end - x86_tcpip_loop_start ) >> 4 )\n\t"
131 
132 		  /* Checksum remaining whole words */
133 		  "mov %13, %3\n\t"
134 		  "jmp 2f\n\t"
135 		  "\n1:\n\t"
136 		  "lodsw\n\t"
137 		  "adcw %w2, %w0\n\t"
138 		  "\n2:\n\t"
139 		  "loop 1b\n\t"
140 
141 		  /* Checksum final byte if applicable */
142 		  "mov %14, %3\n\t"
143 		  "loop 1f\n\t"
144 		  "adcb (%1), %b0\n\t"
145 		  "adcb $0, %h0\n\t"
146 		  "\n1:\n\t"
147 
148 		  /* Fold down to a uint16_t */
149 		  "push %0\n\t"
150 		  "popw %w0\n\t"
151 		  "popw %w2\n\t"
152 		  "adcw %w2, %w0\n\t"
153 #if ULONG_MAX > 0xffffffffUL /* 64-bit only */
154 		  "popw %w2\n\t"
155 		  "adcw %w2, %w0\n\t"
156 		  "popw %w2\n\t"
157 		  "adcw %w2, %w0\n\t"
158 #endif /* 64-bit only */
159 
160 		  /* Consume CF */
161 		  "adcw $0, %w0\n\t"
162 		  "adcw $0, %w0\n\t"
163 
164 		  : "=&Q" ( sum ), "=&S" ( discard_S ), "=&a" ( discard_a ),
165 		    "=&c" ( discard_c ), "=&r" ( discard_r1 ),
166 		    "=&r" ( discard_r2 )
167 		  : "0" ( sum ), "1" ( data ), "2" ( 0 ),
168 		    "3" ( initial_word_count + 1 ), "4" ( loop_partial_count ),
169 		    "5" ( x86_tcpip_loop_end ), "g" ( loop_count + 1 ),
170 		    "g" ( final_word_count + 1 ), "g" ( final_byte ) );
171 
172 	return ( ~sum & 0xffff );
173 }
174