1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #define	ARCFOUR_LOOP_OPTIMIZED
27 
28 #ifndef _KERNEL
29 #include <stdint.h>
30 #endif	/* _KERNEL */
31 
32 #include "arcfour.h"
33 
34 #if defined(__amd64)
35 /* ARCFour_key.flag values */
36 #define	ARCFOUR_ON_INTEL	1
37 #define	ARCFOUR_ON_AMD64	0
38 
39 #ifdef _KERNEL
40 #include <sys/x86_archext.h>
41 #include <sys/cpuvar.h>
42 
43 #else
44 #include <sys/auxv.h>
45 #endif	/* _KERNEL */
46 #endif	/* __amd64 */
47 
48 #ifndef __amd64
49 /*
50  * Initialize the key stream 'key' using the key value.
51  *
52  * Input:
53  * keyval	User-provided key
54  * keyvallen	Length, in bytes, of keyval
55  * Output:
56  * key		Initialized ARCFOUR key schedule, based on keyval
57  */
58 void
59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
60 {
61 /* EXPORT DELETE START */
62 
63 	uchar_t ext_keyval[256];
64 	uchar_t tmp;
65 	int i, j;
66 
67 	/* Normalize key length to 256 */
68 	for (i = j = 0; i < 256; i++, j++) {
69 		if (j == keyvallen)
70 			j = 0;
71 		ext_keyval[i] = keyval[j];
72 	}
73 
74 	for (i = 0; i < 256; i++)
75 		key->arr[i] = (uchar_t)i;
76 
77 	j = 0;
78 	for (i = 0; i < 256; i++) {
79 		j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
80 		tmp = key->arr[i];
81 		key->arr[i] = key->arr[j];
82 		key->arr[j] = tmp;
83 	}
84 	key->i = 0;
85 	key->j = 0;
86 
87 /* EXPORT DELETE END */
88 }
89 #endif	/* !__amd64 */
90 
91 
92 /*
93  * Encipher 'in' using 'key'.
94  *
95  * Input:
96  * key		ARCFOUR key, initialized by arcfour_key_init()
97  * in		Input text
98  * out		Buffer to contain output text
99  * len		Length, in bytes, of the in and out buffers
100  *
101  * Output:
102  * out		Buffer containing output text
103  *
104  * Note: in and out can point to the same location
105  */
106 void
107 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
108 {
109 /* EXPORT DELETE START */
110 #ifdef	__amd64
111 	if (key->flag == ARCFOUR_ON_AMD64) {
112 		arcfour_crypt_asm(key, in, out, len);
113 	} else { /* Intel EM64T */
114 #endif	/* amd64 */
115 
116 	size_t		ii;
117 	uchar_t		i, j, ti, tj;
118 #ifdef ARCFOUR_LOOP_OPTIMIZED
119 	uchar_t		arr_ij;
120 #endif
121 #ifdef __amd64
122 	uint32_t	*arr;
123 #else
124 	uchar_t		*arr;
125 #endif
126 
127 #ifdef	sun4u
128 	/*
129 	 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
130 	 * the cases where the input and output buffers are aligned on
131 	 * a multiple of 8-byte boundary.
132 	 */
133 	int		index;
134 	uchar_t		tmp;
135 
136 	index = (((uint64_t)(uintptr_t)in) & 0x7);
137 
138 	/* Get the 'in' on an 8-byte alignment */
139 	if (index > 0) {
140 		i = key->i;
141 		j = key->j;
142 		for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
143 		    (index-- > 0) && len > 0;
144 		    len--, in++, out++) {
145 			++i;
146 			j = j + key->arr[i];
147 			tmp = key->arr[i];
148 			key->arr[i] = key->arr[j];
149 			key->arr[j] = tmp;
150 			tmp = key->arr[i] + key->arr[j];
151 			*out = *in ^ key->arr[tmp];
152 		}
153 		key->i = i;
154 		key->j = j;
155 	}
156 
157 	if (len == 0)
158 		return;
159 
160 	/* See if we're fortunate and 'out' got aligned as well */
161 
162 	if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
163 #endif	/* sun4u */
164 
165 	i = key->i;
166 	j = key->j;
167 	arr = key->arr;
168 
169 #ifndef ARCFOUR_LOOP_OPTIMIZED
170 	/*
171 	 * This loop is hasn't been reordered, but is kept for reference
172 	 * purposes as it's more readable
173 	 */
174 	for (ii = 0; ii < len; ++ii) {
175 		++i;
176 		ti = arr[i];
177 		j = j + ti;
178 		tj = arr[j];
179 		arr[j] = ti;
180 		arr[i] = tj;
181 		out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
182 	}
183 
184 #else
185 	/*
186 	 * This for loop is optimized by carefully spreading out
187 	 * memory access and storage to avoid conflicts,
188 	 * allowing the processor to process operations in parallel
189 	 */
190 
191 	/* for loop setup */
192 	++i;
193 	ti = arr[i];
194 	j = j + ti;
195 	tj = arr[j];
196 	arr[j] = ti;
197 	arr[i] = tj;
198 	arr_ij = arr[(ti + tj) & 0xff];
199 	--len;
200 
201 	for (ii = 0; ii < len; ) {
202 		++i;
203 		ti = arr[i];
204 		j = j + ti;
205 		tj = arr[j];
206 		arr[j] = ti;
207 		arr[i] = tj;
208 
209 		/* save result from previous loop: */
210 		out[ii] = in[ii] ^ arr_ij;
211 
212 		++ii;
213 		arr_ij = arr[(ti + tj) & 0xff];
214 	}
215 	/* save result from last loop: */
216 	out[ii] = in[ii] ^ arr_ij;
217 #endif
218 
219 	key->i = i;
220 	key->j = j;
221 
222 #ifdef	sun4u
223 	} else {
224 		arcfour_crypt_aligned(key, len, in, out);
225 	}
226 #endif	/* sun4u */
227 #ifdef	__amd64
228 	}
229 #endif	/* amd64 */
230 
231 /* EXPORT DELETE END */
232 }
233 
234 
235 #ifdef	__amd64
236 /*
237  * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
238  * Cache the result, as the CPU can't change.
239  *
240  * Note: the userland version uses getisax() and checks for an AMD-64-only
241  * feature.  The kernel version uses cpuid_getvendor().
242  */
243 int
244 arcfour_crypt_on_intel(void)
245 {
246 	static int	cached_result = -1;
247 
248 	if (cached_result == -1) { /* first time */
249 #ifdef _KERNEL
250 		cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
251 #else
252 		uint_t	ui;
253 
254 		(void) getisax(&ui, 1);
255 		cached_result = ((ui & AV_386_AMD_MMX) == 0);
256 #endif	/* _KERNEL */
257 	}
258 
259 	return (cached_result);
260 }
261 #endif	/* __amd64 */
262