1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "mpi.h"
6 #include "prtypes.h"
7
8 /*
9 * This file implements a single function: s_mpi_getProcessorLineSize();
10 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
11 * if a cache exists, or zero if there is no cache. If more than one
12 * cache line exists, it should return the smallest line size (which is
13 * usually the L1 cache).
14 *
15 * mp_modexp uses this information to make sure that private key information
16 * isn't being leaked through the cache.
17 *
18 * Currently the file returns good data for most modern x86 processors, and
19 * reasonable data on 64-bit ppc processors. All other processors are assumed
20 * to have a cache line size of 32 bytes unless modified by target.mk.
21 *
22 */
23
24 #if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
25 /* X86 processors have special instructions that tell us about the cache */
26 #include "string.h"
27
28 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
29 #define AMD_64 1
30 #endif
31
32 /* Generic CPUID function */
33 #if defined(AMD_64)
34
35 #if defined(__GNUC__)
36
37 void
freebl_cpuid(unsigned long op,unsigned long * eax,unsigned long * ebx,unsigned long * ecx,unsigned long * edx)38 freebl_cpuid(unsigned long op, unsigned long *eax,
39 unsigned long *ebx, unsigned long *ecx,
40 unsigned long *edx)
41 {
42 __asm__("cpuid\n\t"
43 : "=a"(*eax),
44 "=b"(*ebx),
45 "=c"(*ecx),
46 "=d"(*edx)
47 : "0"(op));
48 }
49
50 #elif defined(_MSC_VER)
51
52 #include <intrin.h>
53
54 void
freebl_cpuid(unsigned long op,unsigned long * eax,unsigned long * ebx,unsigned long * ecx,unsigned long * edx)55 freebl_cpuid(unsigned long op, unsigned long *eax,
56 unsigned long *ebx, unsigned long *ecx,
57 unsigned long *edx)
58 {
59 int intrinsic_out[4];
60
61 __cpuid(intrinsic_out, op);
62 *eax = intrinsic_out[0];
63 *ebx = intrinsic_out[1];
64 *ecx = intrinsic_out[2];
65 *edx = intrinsic_out[3];
66 }
67
68 #endif
69
70 #else /* !defined(AMD_64) */
71
72 /* x86 */
73
74 #if defined(__GNUC__)
75 void
freebl_cpuid(unsigned long op,unsigned long * eax,unsigned long * ebx,unsigned long * ecx,unsigned long * edx)76 freebl_cpuid(unsigned long op, unsigned long *eax,
77 unsigned long *ebx, unsigned long *ecx,
78 unsigned long *edx)
79 {
80 /* Some older processors don't fill the ecx register with cpuid, so clobber it
81 * before calling cpuid, so that there's no risk of picking random bits that
82 * erroneously indicate that absent CPU features are present.
83 * Also, GCC isn't smart enough to save the ebx PIC register on its own
84 * in this case, so do it by hand. Use edi to store ebx and pass the
85 * value returned in ebx from cpuid through edi. */
86 __asm__("xor %%ecx, %%ecx\n\t"
87 "mov %%ebx,%%edi\n\t"
88 "cpuid\n\t"
89 "xchgl %%ebx,%%edi\n\t"
90 : "=a"(*eax),
91 "=D"(*ebx),
92 "=c"(*ecx),
93 "=d"(*edx)
94 : "0"(op));
95 }
96
97 /*
98 * try flipping a processor flag to determine CPU type
99 */
100 static unsigned long
changeFlag(unsigned long flag)101 changeFlag(unsigned long flag)
102 {
103 unsigned long changedFlags, originalFlags;
104 __asm__("pushfl\n\t" /* get the flags */
105 "popl %0\n\t"
106 "movl %0,%1\n\t" /* save the original flags */
107 "xorl %2,%0\n\t" /* flip the bit */
108 "pushl %0\n\t" /* set the flags */
109 "popfl\n\t"
110 "pushfl\n\t" /* get the flags again (for return) */
111 "popl %0\n\t"
112 "pushl %1\n\t" /* restore the original flags */
113 "popfl\n\t"
114 : "=r"(changedFlags),
115 "=r"(originalFlags),
116 "=r"(flag)
117 : "2"(flag));
118 return changedFlags ^ originalFlags;
119 }
120
121 #elif defined(_MSC_VER)
122
123 /*
124 * windows versions of the above assembler
125 */
126 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
127 void
freebl_cpuid(unsigned long op,unsigned long * Reax,unsigned long * Rebx,unsigned long * Recx,unsigned long * Redx)128 freebl_cpuid(unsigned long op, unsigned long *Reax,
129 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
130 {
131 unsigned long Leax, Lebx, Lecx, Ledx;
132 __asm {
133 pushad
134 xor ecx,ecx
135 mov eax,op
136 wcpuid
137 mov Leax,eax
138 mov Lebx,ebx
139 mov Lecx,ecx
140 mov Ledx,edx
141 popad
142 }
143 *Reax = Leax;
144 *Rebx = Lebx;
145 *Recx = Lecx;
146 *Redx = Ledx;
147 }
148
149 static unsigned long
changeFlag(unsigned long flag)150 changeFlag(unsigned long flag)
151 {
152 unsigned long changedFlags, originalFlags;
153 __asm {
154 push eax
155 push ebx
156 pushfd /* get the flags */
157 pop eax
158 push eax /* save the flags on the stack */
159 mov originalFlags,eax /* save the original flags */
160 mov ebx,flag
161 xor eax,ebx /* flip the bit */
162 push eax /* set the flags */
163 popfd
164 pushfd /* get the flags again (for return) */
165 pop eax
166 popfd /* restore the original flags */
167 mov changedFlags,eax
168 pop ebx
169 pop eax
170 }
171 return changedFlags ^ originalFlags;
172 }
173 #endif
174
175 #endif
176
177 #if !defined(AMD_64)
178 #define AC_FLAG 0x40000
179 #define ID_FLAG 0x200000
180
181 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
182 static int
is386()183 is386()
184 {
185 return changeFlag(AC_FLAG) == 0;
186 }
187
188 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
189 static int
is486()190 is486()
191 {
192 return changeFlag(ID_FLAG) == 0;
193 }
194 #endif
195
196 /*
197 * table for Intel Cache.
198 * See Intel Application Note AP-485 for more information
199 */
200
201 typedef unsigned char CacheTypeEntry;
202
203 typedef enum {
204 Cache_NONE = 0,
205 Cache_UNKNOWN = 1,
206 Cache_TLB = 2,
207 Cache_TLBi = 3,
208 Cache_TLBd = 4,
209 Cache_Trace = 5,
210 Cache_L1 = 6,
211 Cache_L1i = 7,
212 Cache_L1d = 8,
213 Cache_L2 = 9,
214 Cache_L2i = 10,
215 Cache_L2d = 11,
216 Cache_L3 = 12,
217 Cache_L3i = 13,
218 Cache_L3d = 14
219 } CacheType;
220
221 struct _cache {
222 CacheTypeEntry type;
223 unsigned char lineSize;
224 };
225 static const struct _cache CacheMap[256] = {
226 /* 00 */ { Cache_NONE, 0 },
227 /* 01 */ { Cache_TLBi, 0 },
228 /* 02 */ { Cache_TLBi, 0 },
229 /* 03 */ { Cache_TLBd, 0 },
230 /* 04 */ {
231 Cache_TLBd,
232 },
233 /* 05 */ { Cache_UNKNOWN, 0 },
234 /* 06 */ { Cache_L1i, 32 },
235 /* 07 */ { Cache_UNKNOWN, 0 },
236 /* 08 */ { Cache_L1i, 32 },
237 /* 09 */ { Cache_UNKNOWN, 0 },
238 /* 0a */ { Cache_L1d, 32 },
239 /* 0b */ { Cache_UNKNOWN, 0 },
240 /* 0c */ { Cache_L1d, 32 },
241 /* 0d */ { Cache_UNKNOWN, 0 },
242 /* 0e */ { Cache_UNKNOWN, 0 },
243 /* 0f */ { Cache_UNKNOWN, 0 },
244 /* 10 */ { Cache_UNKNOWN, 0 },
245 /* 11 */ { Cache_UNKNOWN, 0 },
246 /* 12 */ { Cache_UNKNOWN, 0 },
247 /* 13 */ { Cache_UNKNOWN, 0 },
248 /* 14 */ { Cache_UNKNOWN, 0 },
249 /* 15 */ { Cache_UNKNOWN, 0 },
250 /* 16 */ { Cache_UNKNOWN, 0 },
251 /* 17 */ { Cache_UNKNOWN, 0 },
252 /* 18 */ { Cache_UNKNOWN, 0 },
253 /* 19 */ { Cache_UNKNOWN, 0 },
254 /* 1a */ { Cache_UNKNOWN, 0 },
255 /* 1b */ { Cache_UNKNOWN, 0 },
256 /* 1c */ { Cache_UNKNOWN, 0 },
257 /* 1d */ { Cache_UNKNOWN, 0 },
258 /* 1e */ { Cache_UNKNOWN, 0 },
259 /* 1f */ { Cache_UNKNOWN, 0 },
260 /* 20 */ { Cache_UNKNOWN, 0 },
261 /* 21 */ { Cache_UNKNOWN, 0 },
262 /* 22 */ { Cache_L3, 64 },
263 /* 23 */ { Cache_L3, 64 },
264 /* 24 */ { Cache_UNKNOWN, 0 },
265 /* 25 */ { Cache_L3, 64 },
266 /* 26 */ { Cache_UNKNOWN, 0 },
267 /* 27 */ { Cache_UNKNOWN, 0 },
268 /* 28 */ { Cache_UNKNOWN, 0 },
269 /* 29 */ { Cache_L3, 64 },
270 /* 2a */ { Cache_UNKNOWN, 0 },
271 /* 2b */ { Cache_UNKNOWN, 0 },
272 /* 2c */ { Cache_L1d, 64 },
273 /* 2d */ { Cache_UNKNOWN, 0 },
274 /* 2e */ { Cache_UNKNOWN, 0 },
275 /* 2f */ { Cache_UNKNOWN, 0 },
276 /* 30 */ { Cache_L1i, 64 },
277 /* 31 */ { Cache_UNKNOWN, 0 },
278 /* 32 */ { Cache_UNKNOWN, 0 },
279 /* 33 */ { Cache_UNKNOWN, 0 },
280 /* 34 */ { Cache_UNKNOWN, 0 },
281 /* 35 */ { Cache_UNKNOWN, 0 },
282 /* 36 */ { Cache_UNKNOWN, 0 },
283 /* 37 */ { Cache_UNKNOWN, 0 },
284 /* 38 */ { Cache_UNKNOWN, 0 },
285 /* 39 */ { Cache_L2, 64 },
286 /* 3a */ { Cache_UNKNOWN, 0 },
287 /* 3b */ { Cache_L2, 64 },
288 /* 3c */ { Cache_L2, 64 },
289 /* 3d */ { Cache_UNKNOWN, 0 },
290 /* 3e */ { Cache_UNKNOWN, 0 },
291 /* 3f */ { Cache_UNKNOWN, 0 },
292 /* 40 */ { Cache_L2, 0 },
293 /* 41 */ { Cache_L2, 32 },
294 /* 42 */ { Cache_L2, 32 },
295 /* 43 */ { Cache_L2, 32 },
296 /* 44 */ { Cache_L2, 32 },
297 /* 45 */ { Cache_L2, 32 },
298 /* 46 */ { Cache_UNKNOWN, 0 },
299 /* 47 */ { Cache_UNKNOWN, 0 },
300 /* 48 */ { Cache_UNKNOWN, 0 },
301 /* 49 */ { Cache_UNKNOWN, 0 },
302 /* 4a */ { Cache_UNKNOWN, 0 },
303 /* 4b */ { Cache_UNKNOWN, 0 },
304 /* 4c */ { Cache_UNKNOWN, 0 },
305 /* 4d */ { Cache_UNKNOWN, 0 },
306 /* 4e */ { Cache_UNKNOWN, 0 },
307 /* 4f */ { Cache_UNKNOWN, 0 },
308 /* 50 */ { Cache_TLBi, 0 },
309 /* 51 */ { Cache_TLBi, 0 },
310 /* 52 */ { Cache_TLBi, 0 },
311 /* 53 */ { Cache_UNKNOWN, 0 },
312 /* 54 */ { Cache_UNKNOWN, 0 },
313 /* 55 */ { Cache_UNKNOWN, 0 },
314 /* 56 */ { Cache_UNKNOWN, 0 },
315 /* 57 */ { Cache_UNKNOWN, 0 },
316 /* 58 */ { Cache_UNKNOWN, 0 },
317 /* 59 */ { Cache_UNKNOWN, 0 },
318 /* 5a */ { Cache_UNKNOWN, 0 },
319 /* 5b */ { Cache_TLBd, 0 },
320 /* 5c */ { Cache_TLBd, 0 },
321 /* 5d */ { Cache_TLBd, 0 },
322 /* 5e */ { Cache_UNKNOWN, 0 },
323 /* 5f */ { Cache_UNKNOWN, 0 },
324 /* 60 */ { Cache_UNKNOWN, 0 },
325 /* 61 */ { Cache_UNKNOWN, 0 },
326 /* 62 */ { Cache_UNKNOWN, 0 },
327 /* 63 */ { Cache_UNKNOWN, 0 },
328 /* 64 */ { Cache_UNKNOWN, 0 },
329 /* 65 */ { Cache_UNKNOWN, 0 },
330 /* 66 */ { Cache_L1d, 64 },
331 /* 67 */ { Cache_L1d, 64 },
332 /* 68 */ { Cache_L1d, 64 },
333 /* 69 */ { Cache_UNKNOWN, 0 },
334 /* 6a */ { Cache_UNKNOWN, 0 },
335 /* 6b */ { Cache_UNKNOWN, 0 },
336 /* 6c */ { Cache_UNKNOWN, 0 },
337 /* 6d */ { Cache_UNKNOWN, 0 },
338 /* 6e */ { Cache_UNKNOWN, 0 },
339 /* 6f */ { Cache_UNKNOWN, 0 },
340 /* 70 */ { Cache_Trace, 1 },
341 /* 71 */ { Cache_Trace, 1 },
342 /* 72 */ { Cache_Trace, 1 },
343 /* 73 */ { Cache_UNKNOWN, 0 },
344 /* 74 */ { Cache_UNKNOWN, 0 },
345 /* 75 */ { Cache_UNKNOWN, 0 },
346 /* 76 */ { Cache_UNKNOWN, 0 },
347 /* 77 */ { Cache_UNKNOWN, 0 },
348 /* 78 */ { Cache_UNKNOWN, 0 },
349 /* 79 */ { Cache_L2, 64 },
350 /* 7a */ { Cache_L2, 64 },
351 /* 7b */ { Cache_L2, 64 },
352 /* 7c */ { Cache_L2, 64 },
353 /* 7d */ { Cache_UNKNOWN, 0 },
354 /* 7e */ { Cache_UNKNOWN, 0 },
355 /* 7f */ { Cache_UNKNOWN, 0 },
356 /* 80 */ { Cache_UNKNOWN, 0 },
357 /* 81 */ { Cache_UNKNOWN, 0 },
358 /* 82 */ { Cache_L2, 32 },
359 /* 83 */ { Cache_L2, 32 },
360 /* 84 */ { Cache_L2, 32 },
361 /* 85 */ { Cache_L2, 32 },
362 /* 86 */ { Cache_L2, 64 },
363 /* 87 */ { Cache_L2, 64 },
364 /* 88 */ { Cache_UNKNOWN, 0 },
365 /* 89 */ { Cache_UNKNOWN, 0 },
366 /* 8a */ { Cache_UNKNOWN, 0 },
367 /* 8b */ { Cache_UNKNOWN, 0 },
368 /* 8c */ { Cache_UNKNOWN, 0 },
369 /* 8d */ { Cache_UNKNOWN, 0 },
370 /* 8e */ { Cache_UNKNOWN, 0 },
371 /* 8f */ { Cache_UNKNOWN, 0 },
372 /* 90 */ { Cache_UNKNOWN, 0 },
373 /* 91 */ { Cache_UNKNOWN, 0 },
374 /* 92 */ { Cache_UNKNOWN, 0 },
375 /* 93 */ { Cache_UNKNOWN, 0 },
376 /* 94 */ { Cache_UNKNOWN, 0 },
377 /* 95 */ { Cache_UNKNOWN, 0 },
378 /* 96 */ { Cache_UNKNOWN, 0 },
379 /* 97 */ { Cache_UNKNOWN, 0 },
380 /* 98 */ { Cache_UNKNOWN, 0 },
381 /* 99 */ { Cache_UNKNOWN, 0 },
382 /* 9a */ { Cache_UNKNOWN, 0 },
383 /* 9b */ { Cache_UNKNOWN, 0 },
384 /* 9c */ { Cache_UNKNOWN, 0 },
385 /* 9d */ { Cache_UNKNOWN, 0 },
386 /* 9e */ { Cache_UNKNOWN, 0 },
387 /* 9f */ { Cache_UNKNOWN, 0 },
388 /* a0 */ { Cache_UNKNOWN, 0 },
389 /* a1 */ { Cache_UNKNOWN, 0 },
390 /* a2 */ { Cache_UNKNOWN, 0 },
391 /* a3 */ { Cache_UNKNOWN, 0 },
392 /* a4 */ { Cache_UNKNOWN, 0 },
393 /* a5 */ { Cache_UNKNOWN, 0 },
394 /* a6 */ { Cache_UNKNOWN, 0 },
395 /* a7 */ { Cache_UNKNOWN, 0 },
396 /* a8 */ { Cache_UNKNOWN, 0 },
397 /* a9 */ { Cache_UNKNOWN, 0 },
398 /* aa */ { Cache_UNKNOWN, 0 },
399 /* ab */ { Cache_UNKNOWN, 0 },
400 /* ac */ { Cache_UNKNOWN, 0 },
401 /* ad */ { Cache_UNKNOWN, 0 },
402 /* ae */ { Cache_UNKNOWN, 0 },
403 /* af */ { Cache_UNKNOWN, 0 },
404 /* b0 */ { Cache_TLBi, 0 },
405 /* b1 */ { Cache_UNKNOWN, 0 },
406 /* b2 */ { Cache_UNKNOWN, 0 },
407 /* b3 */ { Cache_TLBd, 0 },
408 /* b4 */ { Cache_UNKNOWN, 0 },
409 /* b5 */ { Cache_UNKNOWN, 0 },
410 /* b6 */ { Cache_UNKNOWN, 0 },
411 /* b7 */ { Cache_UNKNOWN, 0 },
412 /* b8 */ { Cache_UNKNOWN, 0 },
413 /* b9 */ { Cache_UNKNOWN, 0 },
414 /* ba */ { Cache_UNKNOWN, 0 },
415 /* bb */ { Cache_UNKNOWN, 0 },
416 /* bc */ { Cache_UNKNOWN, 0 },
417 /* bd */ { Cache_UNKNOWN, 0 },
418 /* be */ { Cache_UNKNOWN, 0 },
419 /* bf */ { Cache_UNKNOWN, 0 },
420 /* c0 */ { Cache_UNKNOWN, 0 },
421 /* c1 */ { Cache_UNKNOWN, 0 },
422 /* c2 */ { Cache_UNKNOWN, 0 },
423 /* c3 */ { Cache_UNKNOWN, 0 },
424 /* c4 */ { Cache_UNKNOWN, 0 },
425 /* c5 */ { Cache_UNKNOWN, 0 },
426 /* c6 */ { Cache_UNKNOWN, 0 },
427 /* c7 */ { Cache_UNKNOWN, 0 },
428 /* c8 */ { Cache_UNKNOWN, 0 },
429 /* c9 */ { Cache_UNKNOWN, 0 },
430 /* ca */ { Cache_UNKNOWN, 0 },
431 /* cb */ { Cache_UNKNOWN, 0 },
432 /* cc */ { Cache_UNKNOWN, 0 },
433 /* cd */ { Cache_UNKNOWN, 0 },
434 /* ce */ { Cache_UNKNOWN, 0 },
435 /* cf */ { Cache_UNKNOWN, 0 },
436 /* d0 */ { Cache_UNKNOWN, 0 },
437 /* d1 */ { Cache_UNKNOWN, 0 },
438 /* d2 */ { Cache_UNKNOWN, 0 },
439 /* d3 */ { Cache_UNKNOWN, 0 },
440 /* d4 */ { Cache_UNKNOWN, 0 },
441 /* d5 */ { Cache_UNKNOWN, 0 },
442 /* d6 */ { Cache_UNKNOWN, 0 },
443 /* d7 */ { Cache_UNKNOWN, 0 },
444 /* d8 */ { Cache_UNKNOWN, 0 },
445 /* d9 */ { Cache_UNKNOWN, 0 },
446 /* da */ { Cache_UNKNOWN, 0 },
447 /* db */ { Cache_UNKNOWN, 0 },
448 /* dc */ { Cache_UNKNOWN, 0 },
449 /* dd */ { Cache_UNKNOWN, 0 },
450 /* de */ { Cache_UNKNOWN, 0 },
451 /* df */ { Cache_UNKNOWN, 0 },
452 /* e0 */ { Cache_UNKNOWN, 0 },
453 /* e1 */ { Cache_UNKNOWN, 0 },
454 /* e2 */ { Cache_UNKNOWN, 0 },
455 /* e3 */ { Cache_UNKNOWN, 0 },
456 /* e4 */ { Cache_UNKNOWN, 0 },
457 /* e5 */ { Cache_UNKNOWN, 0 },
458 /* e6 */ { Cache_UNKNOWN, 0 },
459 /* e7 */ { Cache_UNKNOWN, 0 },
460 /* e8 */ { Cache_UNKNOWN, 0 },
461 /* e9 */ { Cache_UNKNOWN, 0 },
462 /* ea */ { Cache_UNKNOWN, 0 },
463 /* eb */ { Cache_UNKNOWN, 0 },
464 /* ec */ { Cache_UNKNOWN, 0 },
465 /* ed */ { Cache_UNKNOWN, 0 },
466 /* ee */ { Cache_UNKNOWN, 0 },
467 /* ef */ { Cache_UNKNOWN, 0 },
468 /* f0 */ { Cache_UNKNOWN, 0 },
469 /* f1 */ { Cache_UNKNOWN, 0 },
470 /* f2 */ { Cache_UNKNOWN, 0 },
471 /* f3 */ { Cache_UNKNOWN, 0 },
472 /* f4 */ { Cache_UNKNOWN, 0 },
473 /* f5 */ { Cache_UNKNOWN, 0 },
474 /* f6 */ { Cache_UNKNOWN, 0 },
475 /* f7 */ { Cache_UNKNOWN, 0 },
476 /* f8 */ { Cache_UNKNOWN, 0 },
477 /* f9 */ { Cache_UNKNOWN, 0 },
478 /* fa */ { Cache_UNKNOWN, 0 },
479 /* fb */ { Cache_UNKNOWN, 0 },
480 /* fc */ { Cache_UNKNOWN, 0 },
481 /* fd */ { Cache_UNKNOWN, 0 },
482 /* fe */ { Cache_UNKNOWN, 0 },
483 /* ff */ { Cache_UNKNOWN, 0 }
484 };
485
486 /*
487 * use the above table to determine the CacheEntryLineSize.
488 */
489 static void
getIntelCacheEntryLineSize(unsigned long val,int * level,unsigned long * lineSize)490 getIntelCacheEntryLineSize(unsigned long val, int *level,
491 unsigned long *lineSize)
492 {
493 CacheType type;
494
495 type = CacheMap[val].type;
496 /* only interested in data caches */
497 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
498 * this data check has the side effect of rejecting that entry. If
499 * that wasn't the case, we could have to reject it explicitly */
500 if (CacheMap[val].lineSize == 0) {
501 return;
502 }
503 /* look at the caches, skip types we aren't interested in.
504 * if we already have a value for a lower level cache, skip the
505 * current entry */
506 if ((type == Cache_L1) || (type == Cache_L1d)) {
507 *level = 1;
508 *lineSize = CacheMap[val].lineSize;
509 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
510 *level = 2;
511 *lineSize = CacheMap[val].lineSize;
512 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
513 *level = 3;
514 *lineSize = CacheMap[val].lineSize;
515 }
516 return;
517 }
518
519 static void
getIntelRegisterCacheLineSize(unsigned long val,int * level,unsigned long * lineSize)520 getIntelRegisterCacheLineSize(unsigned long val,
521 int *level, unsigned long *lineSize)
522 {
523 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
524 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
525 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
526 getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
527 }
528
529 /*
530 * returns '0' if no recognized cache is found, or if the cache
531 * information is supported by this processor
532 */
533 static unsigned long
getIntelCacheLineSize(int cpuidLevel)534 getIntelCacheLineSize(int cpuidLevel)
535 {
536 int level = 4;
537 unsigned long lineSize = 0;
538 unsigned long eax, ebx, ecx, edx;
539 int repeat, count;
540
541 if (cpuidLevel < 2) {
542 return 0;
543 }
544
545 /* command '2' of the cpuid is intel's cache info call. Each byte of the
546 * 4 registers contain a potential descriptor for the cache. The CacheMap
547 * table maps the cache entry with the processor cache. Register 'al'
548 * contains a count value that cpuid '2' needs to be called in order to
549 * find all the cache descriptors. Only registers with the high bit set
550 * to 'zero' have valid descriptors. This code loops through all the
551 * required calls to cpuid '2' and passes any valid descriptors it finds
552 * to the getIntelRegisterCacheLineSize code, which breaks the registers
553 * down into their component descriptors. In the end the lineSize of the
554 * lowest level cache data cache is returned. */
555 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
556 repeat = eax & 0xf;
557 for (count = 0; count < repeat; count++) {
558 if ((eax & 0x80000000) == 0) {
559 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
560 }
561 if ((ebx & 0x80000000) == 0) {
562 getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
563 }
564 if ((ecx & 0x80000000) == 0) {
565 getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
566 }
567 if ((edx & 0x80000000) == 0) {
568 getIntelRegisterCacheLineSize(edx, &level, &lineSize);
569 }
570 if (count + 1 != repeat) {
571 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
572 }
573 }
574 return lineSize;
575 }
576
577 /*
578 * returns '0' if the cache info is not supported by this processor.
579 * This is based on the AMD extended cache commands for cpuid.
580 * (see "AMD Processor Recognition Application Note" Publication 20734).
581 * Some other processors use the identical scheme.
582 * (see "Processor Recognition, Transmeta Corporation").
583 */
584 static unsigned long
getOtherCacheLineSize(unsigned long cpuidLevel)585 getOtherCacheLineSize(unsigned long cpuidLevel)
586 {
587 unsigned long lineSize = 0;
588 unsigned long eax, ebx, ecx, edx;
589
590 /* get the Extended CPUID level */
591 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
592 cpuidLevel = eax;
593
594 if (cpuidLevel >= 0x80000005) {
595 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
596 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
597 }
598 return lineSize;
599 }
600
601 static const char *const manMap[] = {
602 #define INTEL 0
603 "GenuineIntel",
604 #define AMD 1
605 "AuthenticAMD",
606 #define CYRIX 2
607 "CyrixInstead",
608 #define CENTAUR 2
609 "CentaurHauls",
610 #define NEXGEN 3
611 "NexGenDriven",
612 #define TRANSMETA 4
613 "GenuineTMx86",
614 #define RISE 5
615 "RiseRiseRise",
616 #define UMC 6
617 "UMC UMC UMC ",
618 #define SIS 7
619 "Sis Sis Sis ",
620 #define NATIONAL 8
621 "Geode by NSC",
622 };
623
624 static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
625
626 #define MAN_UNKNOWN 9
627
628 #if !defined(AMD_64)
629 #define SSE2_FLAG (1 << 26)
630 unsigned long
s_mpi_is_sse2()631 s_mpi_is_sse2()
632 {
633 unsigned long eax, ebx, ecx, edx;
634
635 if (is386() || is486()) {
636 return 0;
637 }
638 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
639
640 /* has no SSE2 extensions */
641 if (eax == 0) {
642 return 0;
643 }
644
645 freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
646 return (edx & SSE2_FLAG) == SSE2_FLAG;
647 }
648 #endif
649
650 unsigned long
s_mpi_getProcessorLineSize()651 s_mpi_getProcessorLineSize()
652 {
653 unsigned long eax, ebx, ecx, edx;
654 PRUint32 cpuid[3];
655 unsigned long cpuidLevel;
656 unsigned long cacheLineSize = 0;
657 int manufacturer = MAN_UNKNOWN;
658 int i;
659 char string[13];
660
661 #if !defined(AMD_64)
662 if (is386()) {
663 return 0; /* 386 had no cache */
664 }
665 if (is486()) {
666 return 32; /* really? need more info */
667 }
668 #endif
669
670 /* Pentium, cpuid command is available */
671 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
672 cpuidLevel = eax;
673 /* string holds the CPU's manufacturer ID string - a twelve
674 * character ASCII string stored in ebx, edx, ecx, and
675 * the 32-bit extended feature flags are in edx, ecx.
676 */
677 cpuid[0] = ebx;
678 cpuid[1] = ecx;
679 cpuid[2] = edx;
680 memcpy(string, cpuid, sizeof(cpuid));
681 string[12] = 0;
682
683 manufacturer = MAN_UNKNOWN;
684 for (i = 0; i < n_manufacturers; i++) {
685 if (strcmp(manMap[i], string) == 0) {
686 manufacturer = i;
687 }
688 }
689
690 if (manufacturer == INTEL) {
691 cacheLineSize = getIntelCacheLineSize(cpuidLevel);
692 } else {
693 cacheLineSize = getOtherCacheLineSize(cpuidLevel);
694 }
695 /* doesn't support cache info based on cpuid. This means
696 * an old pentium class processor, which have cache lines of
697 * 32. If we learn differently, we can use a switch based on
698 * the Manufacturer id */
699 if (cacheLineSize == 0) {
700 cacheLineSize = 32;
701 }
702 return cacheLineSize;
703 }
704 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
705 #endif
706
707 #if defined(__ppc64__)
708 /*
709 * Sigh, The PPC has some really nice features to help us determine cache
710 * size, since it had lots of direct control functions to do so. The POWER
711 * processor even has an instruction to do this, but it was dropped in
712 * PowerPC. Unfortunately most of them are not available in user mode.
713 *
714 * The dcbz function would be a great way to determine cache line size except
715 * 1) it only works on write-back memory (it throws an exception otherwise),
716 * and 2) because so many mac programs 'knew' the processor cache size was
717 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
718 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
719 * these programs happy. dcbzl work if 64 bit instructions are supported.
720 * If you know 64 bit instructions are supported, and that stack is
721 * write-back, you can use this code.
722 */
723 #include "memory.h"
724
725 /* clear the cache line that contains 'array' */
726 static inline void
dcbzl(char * array)727 dcbzl(char *array)
728 {
729 register char *a asm("r2") = array;
730 __asm__ __volatile__("dcbzl %0,r0"
731 : "=r"(a)
732 : "0"(a));
733 }
734
735 #define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
736
737 #define PPC_MAX_LINE_SIZE 256
738 unsigned long
s_mpi_getProcessorLineSize()739 s_mpi_getProcessorLineSize()
740 {
741 char testArray[2 * PPC_MAX_LINE_SIZE + 1];
742 char *test;
743 int i;
744
745 /* align the array on a maximum line size boundary, so we
746 * know we are starting to clear from the first address */
747 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
748 /* set all the values to 1's */
749 memset(test, 0xff, PPC_MAX_LINE_SIZE);
750 /* clear one cache block starting at 'test' */
751 dcbzl(test);
752
753 /* find the size of the cleared area, that's our block size */
754 for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
755 if (test[i - 1] == 0) {
756 return i;
757 }
758 }
759 return 0;
760 }
761
762 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
763 #endif
764
765 /*
766 * put other processor and platform specific cache code here
767 * return the smallest cache line size in bytes on the processor
768 * (usually the L1 cache). If the OS has a call, this would be
769 * a greate place to put it.
770 *
771 * If there is no cache, return 0;
772 *
773 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
774 * below aren't compiled.
775 *
776 */
777
778 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
779 * OS */
780 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
781
782 unsigned long
s_mpi_getProcessorLineSize()783 s_mpi_getProcessorLineSize()
784 {
785 return MPI_CACHE_LINE_SIZE;
786 }
787 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
788 #endif
789
790 /* If no way to get the processor cache line size has been defined, assume
791 * it's 32 bytes (most common value, does not significantly impact performance)
792 */
793 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
794 unsigned long
s_mpi_getProcessorLineSize()795 s_mpi_getProcessorLineSize()
796 {
797 return 32;
798 }
799 #endif
800
801 #ifdef TEST_IT
802 #include <stdio.h>
803
main()804 main()
805 {
806 printf("line size = %d\n", s_mpi_getProcessorLineSize());
807 }
808 #endif
809