1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin.           */
3 /* All rights reserved.                                              */
4 /*                                                                   */
5 /* Redistribution and use in source and binary forms, with or        */
6 /* without modification, are permitted provided that the following   */
7 /* conditions are met:                                               */
8 /*                                                                   */
9 /*   1. Redistributions of source code must retain the above         */
10 /*      copyright notice, this list of conditions and the following  */
11 /*      disclaimer.                                                  */
12 /*                                                                   */
13 /*   2. Redistributions in binary form must reproduce the above      */
14 /*      copyright notice, this list of conditions and the following  */
15 /*      disclaimer in the documentation and/or other materials       */
16 /*      provided with the distribution.                              */
17 /*                                                                   */
18 /*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19 /*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20 /*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21 /*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22 /*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23 /*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24 /*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25 /*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26 /*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27 /*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28 /*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29 /*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30 /*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31 /*    POSSIBILITY OF SUCH DAMAGE.                                    */
32 /*                                                                   */
33 /* The views and conclusions contained in the software and           */
34 /* documentation are those of the authors and should not be          */
35 /* interpreted as representing official policies, either expressed   */
36 /* or implied, of The University of Texas at Austin.                 */
37 /*********************************************************************/
38 
39 #ifndef PARAM_H
40 #define PARAM_H
41 
42 #ifdef OPTERON
43 
44 #define SNUMOPT		4
45 #define DNUMOPT		2
46 
47 #define GEMM_DEFAULT_OFFSET_A  64
48 #define GEMM_DEFAULT_OFFSET_B 256
49 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
50 
51 #define SGEMM_DEFAULT_UNROLL_N 4
52 #define DGEMM_DEFAULT_UNROLL_N 4
53 #define QGEMM_DEFAULT_UNROLL_N 2
54 #define CGEMM_DEFAULT_UNROLL_N 2
55 #define ZGEMM_DEFAULT_UNROLL_N 2
56 #define XGEMM_DEFAULT_UNROLL_N 1
57 
58 #ifdef ARCH_X86
59 #define SGEMM_DEFAULT_UNROLL_M 4
60 #define DGEMM_DEFAULT_UNROLL_M 2
61 #define QGEMM_DEFAULT_UNROLL_M 2
62 #define CGEMM_DEFAULT_UNROLL_M 2
63 #define ZGEMM_DEFAULT_UNROLL_M 1
64 #define XGEMM_DEFAULT_UNROLL_M 1
65 #else
66 #define SGEMM_DEFAULT_UNROLL_M 8
67 #define DGEMM_DEFAULT_UNROLL_M 4
68 #define QGEMM_DEFAULT_UNROLL_M 2
69 #define CGEMM_DEFAULT_UNROLL_M 4
70 #define ZGEMM_DEFAULT_UNROLL_M 2
71 #define XGEMM_DEFAULT_UNROLL_M 1
72 #endif
73 
74 #define SGEMM_DEFAULT_P sgemm_p
75 #define DGEMM_DEFAULT_P dgemm_p
76 #define QGEMM_DEFAULT_P qgemm_p
77 #define CGEMM_DEFAULT_P cgemm_p
78 #define ZGEMM_DEFAULT_P zgemm_p
79 #define XGEMM_DEFAULT_P xgemm_p
80 
81 #define SGEMM_DEFAULT_R sgemm_r
82 #define DGEMM_DEFAULT_R dgemm_r
83 #define QGEMM_DEFAULT_R qgemm_r
84 #define CGEMM_DEFAULT_R cgemm_r
85 #define ZGEMM_DEFAULT_R zgemm_r
86 #define XGEMM_DEFAULT_R xgemm_r
87 
88 #ifdef ALLOC_HUGETLB
89 
90 #define SGEMM_DEFAULT_Q 248
91 #define DGEMM_DEFAULT_Q 248
92 #define QGEMM_DEFAULT_Q 248
93 #define CGEMM_DEFAULT_Q 248
94 #define ZGEMM_DEFAULT_Q 248
95 #define XGEMM_DEFAULT_Q 248
96 
97 #else
98 
99 #define SGEMM_DEFAULT_Q 240
100 #define DGEMM_DEFAULT_Q 240
101 #define QGEMM_DEFAULT_Q 240
102 #define CGEMM_DEFAULT_Q 240
103 #define ZGEMM_DEFAULT_Q 240
104 #define XGEMM_DEFAULT_Q 240
105 
106 #endif
107 
108 
109 #define SYMV_P	16
110 #define HAVE_EXCLUSIVE_CACHE
111 
112 #endif
113 
114 #if defined(BARCELONA) || defined(SHANGHAI)
115 
116 #define SNUMOPT		8
117 #define DNUMOPT		4
118 
119 #define GEMM_DEFAULT_OFFSET_A  64
120 #define GEMM_DEFAULT_OFFSET_B 832
121 #define GEMM_DEFAULT_ALIGN 0x0fffUL
122 
123 #define SGEMM_DEFAULT_UNROLL_N 4
124 #define DGEMM_DEFAULT_UNROLL_N 4
125 #define QGEMM_DEFAULT_UNROLL_N 2
126 #define CGEMM_DEFAULT_UNROLL_N 2
127 #define ZGEMM_DEFAULT_UNROLL_N 2
128 #define XGEMM_DEFAULT_UNROLL_N 1
129 
130 #ifdef ARCH_X86
131 #define SGEMM_DEFAULT_UNROLL_M 4
132 #define DGEMM_DEFAULT_UNROLL_M 2
133 #define QGEMM_DEFAULT_UNROLL_M 2
134 #define CGEMM_DEFAULT_UNROLL_M 2
135 #define ZGEMM_DEFAULT_UNROLL_M 1
136 #define XGEMM_DEFAULT_UNROLL_M 1
137 #else
138 #define SGEMM_DEFAULT_UNROLL_M 8
139 #define DGEMM_DEFAULT_UNROLL_M 4
140 #define QGEMM_DEFAULT_UNROLL_M 2
141 #define CGEMM_DEFAULT_UNROLL_M 4
142 #define ZGEMM_DEFAULT_UNROLL_M 2
143 #define XGEMM_DEFAULT_UNROLL_M 1
144 #endif
145 
146 #if 0
147 #define SGEMM_DEFAULT_P 496
148 #define DGEMM_DEFAULT_P 248
149 #define QGEMM_DEFAULT_P 124
150 #define CGEMM_DEFAULT_P 248
151 #define ZGEMM_DEFAULT_P 124
152 #define XGEMM_DEFAULT_P  62
153 
154 #define SGEMM_DEFAULT_Q 248
155 #define DGEMM_DEFAULT_Q 248
156 #define QGEMM_DEFAULT_Q 248
157 #define CGEMM_DEFAULT_Q 248
158 #define ZGEMM_DEFAULT_Q 248
159 #define XGEMM_DEFAULT_Q 248
160 
161 #else
162 
163 #define SGEMM_DEFAULT_P 448
164 #define DGEMM_DEFAULT_P 224
165 #define QGEMM_DEFAULT_P 112
166 #define CGEMM_DEFAULT_P 224
167 #define ZGEMM_DEFAULT_P 112
168 #define XGEMM_DEFAULT_P  56
169 
170 #define SGEMM_DEFAULT_Q 224
171 #define DGEMM_DEFAULT_Q 224
172 #define QGEMM_DEFAULT_Q 224
173 #define CGEMM_DEFAULT_Q 224
174 #define ZGEMM_DEFAULT_Q 224
175 #define XGEMM_DEFAULT_Q 224
176 
177 #endif
178 
179 #define SGEMM_DEFAULT_R sgemm_r
180 #define QGEMM_DEFAULT_R qgemm_r
181 #define DGEMM_DEFAULT_R dgemm_r
182 #define CGEMM_DEFAULT_R cgemm_r
183 #define ZGEMM_DEFAULT_R zgemm_r
184 #define XGEMM_DEFAULT_R xgemm_r
185 
186 #define SYMV_P	16
187 #define HAVE_EXCLUSIVE_CACHE
188 
189 #define GEMM_THREAD gemm_thread_mn
190 
191 #endif
192 
193 #ifdef ATHLON
194 
195 #define SNUMOPT		4
196 #define DNUMOPT		2
197 
198 #define GEMM_DEFAULT_OFFSET_A   0
199 #define GEMM_DEFAULT_OFFSET_B 384
200 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
201 
202 #define SGEMM_DEFAULT_UNROLL_N 4
203 #define DGEMM_DEFAULT_UNROLL_N 4
204 #define QGEMM_DEFAULT_UNROLL_N 2
205 #define CGEMM_DEFAULT_UNROLL_N 2
206 #define ZGEMM_DEFAULT_UNROLL_N 2
207 #define XGEMM_DEFAULT_UNROLL_N 1
208 
209 #define SGEMM_DEFAULT_UNROLL_M 2
210 #define DGEMM_DEFAULT_UNROLL_M 1
211 #define QGEMM_DEFAULT_UNROLL_M 2
212 #define CGEMM_DEFAULT_UNROLL_M 1
213 #define ZGEMM_DEFAULT_UNROLL_M 1
214 #define XGEMM_DEFAULT_UNROLL_M 1
215 
216 #define SGEMM_DEFAULT_R sgemm_r
217 #define DGEMM_DEFAULT_R dgemm_r
218 #define QGEMM_DEFAULT_R qgemm_r
219 #define CGEMM_DEFAULT_R cgemm_r
220 #define ZGEMM_DEFAULT_R zgemm_r
221 #define XGEMM_DEFAULT_R xgemm_r
222 
223 #define SGEMM_DEFAULT_P 208
224 #define DGEMM_DEFAULT_P 104
225 #define QGEMM_DEFAULT_P  56
226 #define CGEMM_DEFAULT_P 104
227 #define ZGEMM_DEFAULT_P  56
228 #define XGEMM_DEFAULT_P  28
229 
230 #define SGEMM_DEFAULT_Q 208
231 #define DGEMM_DEFAULT_Q 208
232 #define QGEMM_DEFAULT_Q 208
233 #define CGEMM_DEFAULT_Q 208
234 #define ZGEMM_DEFAULT_Q 208
235 #define XGEMM_DEFAULT_Q 208
236 
237 #define SYMV_P	16
238 #define HAVE_EXCLUSIVE_CACHE
239 #endif
240 
241 #ifdef VIAC3
242 
243 #define SNUMOPT		2
244 #define DNUMOPT		1
245 
246 #define GEMM_DEFAULT_OFFSET_A   0
247 #define GEMM_DEFAULT_OFFSET_B 256
248 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
249 
250 #define SGEMM_DEFAULT_UNROLL_N 4
251 #define DGEMM_DEFAULT_UNROLL_N 4
252 #define QGEMM_DEFAULT_UNROLL_N 2
253 #define CGEMM_DEFAULT_UNROLL_N 2
254 #define ZGEMM_DEFAULT_UNROLL_N 2
255 #define XGEMM_DEFAULT_UNROLL_N 1
256 
257 #define SGEMM_DEFAULT_UNROLL_M 2
258 #define DGEMM_DEFAULT_UNROLL_M 1
259 #define QGEMM_DEFAULT_UNROLL_M 2
260 #define CGEMM_DEFAULT_UNROLL_M 1
261 #define ZGEMM_DEFAULT_UNROLL_M 1
262 #define XGEMM_DEFAULT_UNROLL_M 1
263 
264 #define SGEMM_DEFAULT_R sgemm_r
265 #define DGEMM_DEFAULT_R dgemm_r
266 #define QGEMM_DEFAULT_R qgemm_r
267 #define CGEMM_DEFAULT_R cgemm_r
268 #define ZGEMM_DEFAULT_R zgemm_r
269 #define XGEMM_DEFAULT_R xgemm_r
270 
271 #define SGEMM_DEFAULT_P 128
272 #define DGEMM_DEFAULT_P 128
273 #define QGEMM_DEFAULT_P 128
274 #define CGEMM_DEFAULT_P 128
275 #define ZGEMM_DEFAULT_P 128
276 #define XGEMM_DEFAULT_P 128
277 
278 #define SGEMM_DEFAULT_Q 512
279 #define DGEMM_DEFAULT_Q 256
280 #define QGEMM_DEFAULT_Q 256
281 #define CGEMM_DEFAULT_Q 256
282 #define ZGEMM_DEFAULT_Q 128
283 #define XGEMM_DEFAULT_Q 128
284 
285 #define SYMV_P	16
286 #endif
287 
288 #ifdef NANO
289 
290 #define SNUMOPT		4
291 #define DNUMOPT		2
292 
293 #define GEMM_DEFAULT_OFFSET_A  64
294 #define GEMM_DEFAULT_OFFSET_B 256
295 #define GEMM_DEFAULT_ALIGN 0x01ffffUL
296 
297 #ifdef ARCH_X86
298 #define SGEMM_DEFAULT_UNROLL_N 4
299 #define DGEMM_DEFAULT_UNROLL_N 4
300 #define QGEMM_DEFAULT_UNROLL_N 2
301 #define CGEMM_DEFAULT_UNROLL_N 2
302 #define ZGEMM_DEFAULT_UNROLL_N 2
303 #define XGEMM_DEFAULT_UNROLL_N 1
304 
305 #define SGEMM_DEFAULT_UNROLL_M 4
306 #define DGEMM_DEFAULT_UNROLL_M 2
307 #define QGEMM_DEFAULT_UNROLL_M 2
308 #define CGEMM_DEFAULT_UNROLL_M 2
309 #define ZGEMM_DEFAULT_UNROLL_M 1
310 #define XGEMM_DEFAULT_UNROLL_M 1
311 #else
312 #define SGEMM_DEFAULT_UNROLL_N 8
313 #define DGEMM_DEFAULT_UNROLL_N 4
314 #define QGEMM_DEFAULT_UNROLL_N 2
315 #define CGEMM_DEFAULT_UNROLL_N 4
316 #define ZGEMM_DEFAULT_UNROLL_N 2
317 #define XGEMM_DEFAULT_UNROLL_N 1
318 
319 #define SGEMM_DEFAULT_UNROLL_M 4
320 #define DGEMM_DEFAULT_UNROLL_M 4
321 #define QGEMM_DEFAULT_UNROLL_M 2
322 #define CGEMM_DEFAULT_UNROLL_M 2
323 #define ZGEMM_DEFAULT_UNROLL_M 2
324 #define XGEMM_DEFAULT_UNROLL_M 1
325 #endif
326 
327 #define SGEMM_DEFAULT_P 288
328 #define DGEMM_DEFAULT_P 288
329 #define QGEMM_DEFAULT_P 288
330 #define CGEMM_DEFAULT_P 288
331 #define ZGEMM_DEFAULT_P 288
332 #define XGEMM_DEFAULT_P 288
333 
334 #define SGEMM_DEFAULT_R sgemm_r
335 #define DGEMM_DEFAULT_R dgemm_r
336 #define QGEMM_DEFAULT_R qgemm_r
337 #define CGEMM_DEFAULT_R cgemm_r
338 #define ZGEMM_DEFAULT_R zgemm_r
339 #define XGEMM_DEFAULT_R xgemm_r
340 
341 #define SGEMM_DEFAULT_Q 256
342 #define DGEMM_DEFAULT_Q 128
343 #define QGEMM_DEFAULT_Q  64
344 #define CGEMM_DEFAULT_Q 128
345 #define ZGEMM_DEFAULT_Q  64
346 #define XGEMM_DEFAULT_Q  32
347 
348 #define SYMV_P	16
349 #define HAVE_EXCLUSIVE_CACHE
350 
351 #endif
352 
353 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
354 
355 #ifdef HAVE_SSE
356 #define SNUMOPT		2
357 #else
358 #define SNUMOPT		1
359 #endif
360 #define DNUMOPT		1
361 
362 #define GEMM_DEFAULT_OFFSET_A 0
363 #define GEMM_DEFAULT_OFFSET_B 0
364 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
365 
366 #ifdef HAVE_SSE
367 #define SGEMM_DEFAULT_UNROLL_M 8
368 #define CGEMM_DEFAULT_UNROLL_M 4
369 #else
370 #define SGEMM_DEFAULT_UNROLL_M 4
371 #define CGEMM_DEFAULT_UNROLL_M 2
372 #endif
373 #define DGEMM_DEFAULT_UNROLL_M 2
374 #define SGEMM_DEFAULT_UNROLL_N 2
375 #define DGEMM_DEFAULT_UNROLL_N 2
376 #define QGEMM_DEFAULT_UNROLL_M 2
377 #define QGEMM_DEFAULT_UNROLL_N 2
378 #define CGEMM_DEFAULT_UNROLL_N 1
379 #define ZGEMM_DEFAULT_UNROLL_M 1
380 #define ZGEMM_DEFAULT_UNROLL_N 1
381 #define XGEMM_DEFAULT_UNROLL_M 1
382 #define XGEMM_DEFAULT_UNROLL_N 1
383 
384 #define SGEMM_DEFAULT_P sgemm_p
385 #define SGEMM_DEFAULT_Q 256
386 #define SGEMM_DEFAULT_R sgemm_r
387 
388 #define DGEMM_DEFAULT_P dgemm_p
389 #define DGEMM_DEFAULT_Q 256
390 #define DGEMM_DEFAULT_R dgemm_r
391 
392 #define QGEMM_DEFAULT_P qgemm_p
393 #define QGEMM_DEFAULT_Q 256
394 #define QGEMM_DEFAULT_R qgemm_r
395 
396 #define CGEMM_DEFAULT_P cgemm_p
397 #define CGEMM_DEFAULT_Q 256
398 #define CGEMM_DEFAULT_R cgemm_r
399 
400 #define ZGEMM_DEFAULT_P zgemm_p
401 #define ZGEMM_DEFAULT_Q 256
402 #define ZGEMM_DEFAULT_R zgemm_r
403 
404 #define XGEMM_DEFAULT_P xgemm_p
405 #define XGEMM_DEFAULT_Q 256
406 #define XGEMM_DEFAULT_R xgemm_r
407 
408 #define SYMV_P	4
409 
410 #endif
411 
412 #ifdef PENTIUMM
413 
414 #define SNUMOPT		2
415 #define DNUMOPT		1
416 
417 #define GEMM_DEFAULT_OFFSET_A 0
418 #define GEMM_DEFAULT_OFFSET_B 0
419 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
420 
421 #ifdef CORE_YONAH
422 #define SGEMM_DEFAULT_UNROLL_M 4
423 #define SGEMM_DEFAULT_UNROLL_N 4
424 #define DGEMM_DEFAULT_UNROLL_M 2
425 #define DGEMM_DEFAULT_UNROLL_N 4
426 #define QGEMM_DEFAULT_UNROLL_M 2
427 #define QGEMM_DEFAULT_UNROLL_N 2
428 #define CGEMM_DEFAULT_UNROLL_M 2
429 #define CGEMM_DEFAULT_UNROLL_N 2
430 #define ZGEMM_DEFAULT_UNROLL_M 1
431 #define ZGEMM_DEFAULT_UNROLL_N 2
432 #define XGEMM_DEFAULT_UNROLL_M 1
433 #define XGEMM_DEFAULT_UNROLL_N 1
434 #else
435 #define SGEMM_DEFAULT_UNROLL_M 8
436 #define SGEMM_DEFAULT_UNROLL_N 2
437 #define DGEMM_DEFAULT_UNROLL_M 2
438 #define DGEMM_DEFAULT_UNROLL_N 2
439 #define QGEMM_DEFAULT_UNROLL_M 2
440 #define QGEMM_DEFAULT_UNROLL_N 2
441 #define CGEMM_DEFAULT_UNROLL_M 4
442 #define CGEMM_DEFAULT_UNROLL_N 1
443 #define ZGEMM_DEFAULT_UNROLL_M 1
444 #define ZGEMM_DEFAULT_UNROLL_N 1
445 #define XGEMM_DEFAULT_UNROLL_M 1
446 #define XGEMM_DEFAULT_UNROLL_N 1
447 
448 #endif
449 
450 #define SGEMM_DEFAULT_P sgemm_p
451 #define SGEMM_DEFAULT_Q 256
452 #define SGEMM_DEFAULT_R sgemm_r
453 
454 #define DGEMM_DEFAULT_P dgemm_p
455 #define DGEMM_DEFAULT_Q 256
456 #define DGEMM_DEFAULT_R dgemm_r
457 
458 #define QGEMM_DEFAULT_P qgemm_p
459 #define QGEMM_DEFAULT_Q 256
460 #define QGEMM_DEFAULT_R qgemm_r
461 
462 #define CGEMM_DEFAULT_P cgemm_p
463 #define CGEMM_DEFAULT_Q 256
464 #define CGEMM_DEFAULT_R cgemm_r
465 
466 #define ZGEMM_DEFAULT_P zgemm_p
467 #define ZGEMM_DEFAULT_Q 256
468 #define ZGEMM_DEFAULT_R zgemm_r
469 
470 #define XGEMM_DEFAULT_P xgemm_p
471 #define XGEMM_DEFAULT_Q 256
472 #define XGEMM_DEFAULT_R xgemm_r
473 
474 #define SYMV_P	4
475 #endif
476 
477 #ifdef CORE_NORTHWOOD
478 
479 #define SNUMOPT		4
480 #define DNUMOPT		2
481 
482 #define GEMM_DEFAULT_OFFSET_A      0
483 #define GEMM_DEFAULT_OFFSET_B     32
484 
485 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
486 
487 #define SYMV_P	8
488 
489 #define SGEMM_DEFAULT_UNROLL_M 8
490 #define DGEMM_DEFAULT_UNROLL_M 4
491 #define QGEMM_DEFAULT_UNROLL_M 2
492 #define CGEMM_DEFAULT_UNROLL_M 4
493 #define ZGEMM_DEFAULT_UNROLL_M 2
494 #define XGEMM_DEFAULT_UNROLL_M 1
495 
496 #define SGEMM_DEFAULT_UNROLL_N 2
497 #define DGEMM_DEFAULT_UNROLL_N 2
498 #define QGEMM_DEFAULT_UNROLL_N 2
499 #define CGEMM_DEFAULT_UNROLL_N 1
500 #define ZGEMM_DEFAULT_UNROLL_N 1
501 #define XGEMM_DEFAULT_UNROLL_N 1
502 
503 #define SGEMM_DEFAULT_P sgemm_p
504 #define SGEMM_DEFAULT_R sgemm_r
505 
506 #define DGEMM_DEFAULT_P dgemm_p
507 #define DGEMM_DEFAULT_R dgemm_r
508 
509 #define QGEMM_DEFAULT_P qgemm_p
510 #define QGEMM_DEFAULT_R qgemm_r
511 
512 #define CGEMM_DEFAULT_P cgemm_p
513 #define CGEMM_DEFAULT_R cgemm_r
514 
515 #define ZGEMM_DEFAULT_P zgemm_p
516 #define ZGEMM_DEFAULT_R zgemm_r
517 
518 #define XGEMM_DEFAULT_P xgemm_p
519 #define XGEMM_DEFAULT_R xgemm_r
520 
521 #define SGEMM_DEFAULT_Q 128
522 #define DGEMM_DEFAULT_Q 128
523 #define QGEMM_DEFAULT_Q 128
524 #define CGEMM_DEFAULT_Q 128
525 #define ZGEMM_DEFAULT_Q 128
526 #define XGEMM_DEFAULT_Q 128
527 #endif
528 
529 #ifdef CORE_PRESCOTT
530 
531 #define SNUMOPT		4
532 #define DNUMOPT		2
533 
534 #ifndef __64BIT__
535 #define GEMM_DEFAULT_OFFSET_A    128
536 #define GEMM_DEFAULT_OFFSET_B    192
537 #else
538 #define GEMM_DEFAULT_OFFSET_A      0
539 #define GEMM_DEFAULT_OFFSET_B    256
540 #endif
541 
542 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
543 
544 #define SYMV_P	8
545 
546 #ifdef ARCH_X86
547 #define SGEMM_DEFAULT_UNROLL_M 4
548 #define DGEMM_DEFAULT_UNROLL_M 2
549 #define QGEMM_DEFAULT_UNROLL_M 2
550 #define CGEMM_DEFAULT_UNROLL_M 2
551 #define ZGEMM_DEFAULT_UNROLL_M 1
552 #define XGEMM_DEFAULT_UNROLL_M 1
553 #else
554 #define SGEMM_DEFAULT_UNROLL_M 8
555 #define DGEMM_DEFAULT_UNROLL_M 4
556 #define QGEMM_DEFAULT_UNROLL_M 2
557 #define CGEMM_DEFAULT_UNROLL_M 4
558 #define ZGEMM_DEFAULT_UNROLL_M 2
559 #define XGEMM_DEFAULT_UNROLL_M 1
560 #endif
561 
562 #define SGEMM_DEFAULT_UNROLL_N 4
563 #define DGEMM_DEFAULT_UNROLL_N 4
564 #define QGEMM_DEFAULT_UNROLL_N 2
565 #define CGEMM_DEFAULT_UNROLL_N 2
566 #define ZGEMM_DEFAULT_UNROLL_N 2
567 #define XGEMM_DEFAULT_UNROLL_N 1
568 
569 #define SGEMM_DEFAULT_P sgemm_p
570 #define SGEMM_DEFAULT_R sgemm_r
571 
572 #define DGEMM_DEFAULT_P dgemm_p
573 #define DGEMM_DEFAULT_R dgemm_r
574 
575 #define QGEMM_DEFAULT_P qgemm_p
576 #define QGEMM_DEFAULT_R qgemm_r
577 
578 #define CGEMM_DEFAULT_P cgemm_p
579 #define CGEMM_DEFAULT_R cgemm_r
580 
581 #define ZGEMM_DEFAULT_P zgemm_p
582 #define ZGEMM_DEFAULT_R zgemm_r
583 
584 #define XGEMM_DEFAULT_P xgemm_p
585 #define XGEMM_DEFAULT_R xgemm_r
586 
587 #define SGEMM_DEFAULT_Q 128
588 #define DGEMM_DEFAULT_Q 128
589 #define QGEMM_DEFAULT_Q 128
590 #define CGEMM_DEFAULT_Q 128
591 #define ZGEMM_DEFAULT_Q 128
592 #define XGEMM_DEFAULT_Q 128
593 #endif
594 
595 #ifdef CORE2
596 
597 #define SNUMOPT		8
598 #define DNUMOPT		4
599 
600 #define GEMM_DEFAULT_OFFSET_A    448
601 #define GEMM_DEFAULT_OFFSET_B    128
602 #define GEMM_DEFAULT_ALIGN 0x03fffUL
603 
604 #define SYMV_P	8
605 
606 #define SWITCH_RATIO	4
607 
608 #ifdef ARCH_X86
609 #define SGEMM_DEFAULT_UNROLL_M 8
610 #define DGEMM_DEFAULT_UNROLL_M 4
611 #define QGEMM_DEFAULT_UNROLL_M 2
612 #define CGEMM_DEFAULT_UNROLL_M 4
613 #define ZGEMM_DEFAULT_UNROLL_M 2
614 #define XGEMM_DEFAULT_UNROLL_M 1
615 
616 #define SGEMM_DEFAULT_UNROLL_N 2
617 #define DGEMM_DEFAULT_UNROLL_N 2
618 #define QGEMM_DEFAULT_UNROLL_N 2
619 #define CGEMM_DEFAULT_UNROLL_N 1
620 #define ZGEMM_DEFAULT_UNROLL_N 1
621 #define XGEMM_DEFAULT_UNROLL_N 1
622 
623 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
624 
625 #else
626 #define SGEMM_DEFAULT_UNROLL_M 8
627 #define DGEMM_DEFAULT_UNROLL_M 4
628 #define QGEMM_DEFAULT_UNROLL_M 2
629 #define CGEMM_DEFAULT_UNROLL_M 4
630 #define ZGEMM_DEFAULT_UNROLL_M 2
631 #define XGEMM_DEFAULT_UNROLL_M 1
632 
633 #define SGEMM_DEFAULT_UNROLL_N 4
634 #define DGEMM_DEFAULT_UNROLL_N 4
635 #define QGEMM_DEFAULT_UNROLL_N 2
636 #define CGEMM_DEFAULT_UNROLL_N 2
637 #define ZGEMM_DEFAULT_UNROLL_N 2
638 #define XGEMM_DEFAULT_UNROLL_N 1
639 #endif
640 
641 #define SGEMM_DEFAULT_P sgemm_p
642 #define SGEMM_DEFAULT_R sgemm_r
643 
644 #define DGEMM_DEFAULT_P dgemm_p
645 #define DGEMM_DEFAULT_R dgemm_r
646 
647 #define QGEMM_DEFAULT_P qgemm_p
648 #define QGEMM_DEFAULT_R qgemm_r
649 
650 #define CGEMM_DEFAULT_P cgemm_p
651 #define CGEMM_DEFAULT_R cgemm_r
652 
653 #define ZGEMM_DEFAULT_P zgemm_p
654 #define ZGEMM_DEFAULT_R zgemm_r
655 
656 #define XGEMM_DEFAULT_P xgemm_p
657 #define XGEMM_DEFAULT_R xgemm_r
658 
659 #define SGEMM_DEFAULT_Q 256
660 #define DGEMM_DEFAULT_Q 256
661 #define QGEMM_DEFAULT_Q 256
662 #define CGEMM_DEFAULT_Q 256
663 #define ZGEMM_DEFAULT_Q 256
664 #define XGEMM_DEFAULT_Q 256
665 
666 #endif
667 
668 #ifdef PENRYN
669 
670 #define SNUMOPT		8
671 #define DNUMOPT		4
672 
673 #define GEMM_DEFAULT_OFFSET_A   128
674 #define GEMM_DEFAULT_OFFSET_B     0
675 #define GEMM_DEFAULT_ALIGN 0x03fffUL
676 
677 #define SYMV_P	8
678 
679 #define SWITCH_RATIO	4
680 
681 #ifdef ARCH_X86
682 #define SGEMM_DEFAULT_UNROLL_M 4
683 #define DGEMM_DEFAULT_UNROLL_M 2
684 #define QGEMM_DEFAULT_UNROLL_M 2
685 #define CGEMM_DEFAULT_UNROLL_M 2
686 #define ZGEMM_DEFAULT_UNROLL_M 1
687 #define XGEMM_DEFAULT_UNROLL_M 1
688 
689 #define SGEMM_DEFAULT_UNROLL_N 4
690 #define DGEMM_DEFAULT_UNROLL_N 4
691 #define QGEMM_DEFAULT_UNROLL_N 2
692 #define CGEMM_DEFAULT_UNROLL_N 2
693 #define ZGEMM_DEFAULT_UNROLL_N 2
694 #define XGEMM_DEFAULT_UNROLL_N 1
695 #else
696 #define SGEMM_DEFAULT_UNROLL_M 8
697 #define DGEMM_DEFAULT_UNROLL_M 4
698 #define QGEMM_DEFAULT_UNROLL_M 2
699 #define CGEMM_DEFAULT_UNROLL_M 4
700 #define ZGEMM_DEFAULT_UNROLL_M 2
701 #define XGEMM_DEFAULT_UNROLL_M 1
702 
703 #define SGEMM_DEFAULT_UNROLL_N 4
704 #define DGEMM_DEFAULT_UNROLL_N 4
705 #define QGEMM_DEFAULT_UNROLL_N 2
706 #define CGEMM_DEFAULT_UNROLL_N 2
707 #define ZGEMM_DEFAULT_UNROLL_N 2
708 #define XGEMM_DEFAULT_UNROLL_N 1
709 #endif
710 
711 #define SGEMM_DEFAULT_P sgemm_p
712 #define SGEMM_DEFAULT_R sgemm_r
713 
714 #define DGEMM_DEFAULT_P dgemm_p
715 #define DGEMM_DEFAULT_R dgemm_r
716 
717 #define QGEMM_DEFAULT_P qgemm_p
718 #define QGEMM_DEFAULT_R qgemm_r
719 
720 #define CGEMM_DEFAULT_P cgemm_p
721 #define CGEMM_DEFAULT_R cgemm_r
722 
723 #define ZGEMM_DEFAULT_P zgemm_p
724 #define ZGEMM_DEFAULT_R zgemm_r
725 
726 #define XGEMM_DEFAULT_P xgemm_p
727 #define XGEMM_DEFAULT_R xgemm_r
728 
729 #define SGEMM_DEFAULT_Q 512
730 #define DGEMM_DEFAULT_Q 256
731 #define QGEMM_DEFAULT_Q 128
732 #define CGEMM_DEFAULT_Q 512
733 #define ZGEMM_DEFAULT_Q 256
734 #define XGEMM_DEFAULT_Q 128
735 
736 #define GETRF_FACTOR 0.75
737 #endif
738 
739 #ifdef DUNNINGTON
740 
741 #define SNUMOPT		8
742 #define DNUMOPT		4
743 
744 #define GEMM_DEFAULT_OFFSET_A   128
745 #define GEMM_DEFAULT_OFFSET_B     0
746 #define GEMM_DEFAULT_ALIGN 0x03fffUL
747 
748 #define SYMV_P	8
749 
750 #define SWITCH_RATIO	4
751 
752 #ifdef ARCH_X86
753 #define SGEMM_DEFAULT_UNROLL_M 4
754 #define DGEMM_DEFAULT_UNROLL_M 2
755 #define QGEMM_DEFAULT_UNROLL_M 2
756 #define CGEMM_DEFAULT_UNROLL_M 2
757 #define ZGEMM_DEFAULT_UNROLL_M 1
758 #define XGEMM_DEFAULT_UNROLL_M 1
759 
760 #define SGEMM_DEFAULT_UNROLL_N 4
761 #define DGEMM_DEFAULT_UNROLL_N 4
762 #define QGEMM_DEFAULT_UNROLL_N 2
763 #define CGEMM_DEFAULT_UNROLL_N 2
764 #define ZGEMM_DEFAULT_UNROLL_N 2
765 #define XGEMM_DEFAULT_UNROLL_N 1
766 #else
767 #define SGEMM_DEFAULT_UNROLL_M 8
768 #define DGEMM_DEFAULT_UNROLL_M 4
769 #define QGEMM_DEFAULT_UNROLL_M 2
770 #define CGEMM_DEFAULT_UNROLL_M 4
771 #define ZGEMM_DEFAULT_UNROLL_M 2
772 #define XGEMM_DEFAULT_UNROLL_M 1
773 
774 #define SGEMM_DEFAULT_UNROLL_N 4
775 #define DGEMM_DEFAULT_UNROLL_N 4
776 #define QGEMM_DEFAULT_UNROLL_N 2
777 #define CGEMM_DEFAULT_UNROLL_N 2
778 #define ZGEMM_DEFAULT_UNROLL_N 2
779 #define XGEMM_DEFAULT_UNROLL_N 1
780 #endif
781 
782 #define SGEMM_DEFAULT_P sgemm_p
783 #define SGEMM_DEFAULT_R sgemm_r
784 
785 #define DGEMM_DEFAULT_P dgemm_p
786 #define DGEMM_DEFAULT_R dgemm_r
787 
788 #define QGEMM_DEFAULT_P qgemm_p
789 #define QGEMM_DEFAULT_R qgemm_r
790 
791 #define CGEMM_DEFAULT_P cgemm_p
792 #define CGEMM_DEFAULT_R cgemm_r
793 
794 #define ZGEMM_DEFAULT_P zgemm_p
795 #define ZGEMM_DEFAULT_R zgemm_r
796 
797 #define XGEMM_DEFAULT_P xgemm_p
798 #define XGEMM_DEFAULT_R xgemm_r
799 
800 #define SGEMM_DEFAULT_Q 768
801 #define DGEMM_DEFAULT_Q 384
802 #define QGEMM_DEFAULT_Q 192
803 #define CGEMM_DEFAULT_Q 768
804 #define ZGEMM_DEFAULT_Q 384
805 #define XGEMM_DEFAULT_Q 192
806 
807 #define GETRF_FACTOR 0.75
808 #define GEMM_THREAD gemm_thread_mn
809 #endif
810 
811 #ifdef NEHALEM
812 
813 #define SNUMOPT		8
814 #define DNUMOPT		4
815 
816 #define GEMM_DEFAULT_OFFSET_A    32
817 #define GEMM_DEFAULT_OFFSET_B     0
818 #define GEMM_DEFAULT_ALIGN 0x03fffUL
819 
820 #define SYMV_P	8
821 
822 #define SWITCH_RATIO	4
823 
824 #ifdef ARCH_X86
825 #define SGEMM_DEFAULT_UNROLL_M 4
826 #define DGEMM_DEFAULT_UNROLL_M 2
827 #define QGEMM_DEFAULT_UNROLL_M 2
828 #define CGEMM_DEFAULT_UNROLL_M 2
829 #define ZGEMM_DEFAULT_UNROLL_M 1
830 #define XGEMM_DEFAULT_UNROLL_M 1
831 
832 #define SGEMM_DEFAULT_UNROLL_N 4
833 #define DGEMM_DEFAULT_UNROLL_N 4
834 #define QGEMM_DEFAULT_UNROLL_N 2
835 #define CGEMM_DEFAULT_UNROLL_N 2
836 #define ZGEMM_DEFAULT_UNROLL_N 2
837 #define XGEMM_DEFAULT_UNROLL_N 1
838 #else
839 #define SGEMM_DEFAULT_UNROLL_M 4
840 #define DGEMM_DEFAULT_UNROLL_M 2
841 #define QGEMM_DEFAULT_UNROLL_M 2
842 #define CGEMM_DEFAULT_UNROLL_M 2
843 #define ZGEMM_DEFAULT_UNROLL_M 1
844 #define XGEMM_DEFAULT_UNROLL_M 1
845 
846 #define SGEMM_DEFAULT_UNROLL_N 8
847 #define DGEMM_DEFAULT_UNROLL_N 8
848 #define QGEMM_DEFAULT_UNROLL_N 2
849 #define CGEMM_DEFAULT_UNROLL_N 4
850 #define ZGEMM_DEFAULT_UNROLL_N 4
851 #define XGEMM_DEFAULT_UNROLL_N 1
852 #endif
853 
854 #define SGEMM_DEFAULT_P 504
855 #define SGEMM_DEFAULT_R sgemm_r
856 
857 #define DGEMM_DEFAULT_P 504
858 #define DGEMM_DEFAULT_R dgemm_r
859 
860 #define QGEMM_DEFAULT_P 504
861 #define QGEMM_DEFAULT_R qgemm_r
862 
863 #define CGEMM_DEFAULT_P 252
864 #define CGEMM_DEFAULT_R cgemm_r
865 
866 #define ZGEMM_DEFAULT_P 252
867 #define ZGEMM_DEFAULT_R zgemm_r
868 
869 #define XGEMM_DEFAULT_P 252
870 #define XGEMM_DEFAULT_R xgemm_r
871 
872 #define SGEMM_DEFAULT_Q 512
873 #define DGEMM_DEFAULT_Q 256
874 #define QGEMM_DEFAULT_Q 128
875 #define CGEMM_DEFAULT_Q 512
876 #define ZGEMM_DEFAULT_Q 256
877 #define XGEMM_DEFAULT_Q 128
878 
879 #define GETRF_FACTOR 0.72
880 
881 #endif
882 
883 
884 #ifdef ATOM
885 
886 #define SNUMOPT		2
887 #define DNUMOPT		1
888 
889 #define GEMM_DEFAULT_OFFSET_A     64
890 #define GEMM_DEFAULT_OFFSET_B      0
891 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
892 
893 #define SYMV_P	8
894 
895 #ifdef ARCH_X86
896 #define SGEMM_DEFAULT_UNROLL_M 4
897 #define DGEMM_DEFAULT_UNROLL_M 2
898 #define QGEMM_DEFAULT_UNROLL_M 2
899 #define CGEMM_DEFAULT_UNROLL_M 2
900 #define ZGEMM_DEFAULT_UNROLL_M 1
901 #define XGEMM_DEFAULT_UNROLL_M 1
902 #else
903 #define SGEMM_DEFAULT_UNROLL_M 8
904 #define DGEMM_DEFAULT_UNROLL_M 4
905 #define QGEMM_DEFAULT_UNROLL_M 2
906 #define CGEMM_DEFAULT_UNROLL_M 4
907 #define ZGEMM_DEFAULT_UNROLL_M 2
908 #define XGEMM_DEFAULT_UNROLL_M 1
909 #endif
910 
911 #define SGEMM_DEFAULT_UNROLL_N 4
912 #define DGEMM_DEFAULT_UNROLL_N 2
913 #define QGEMM_DEFAULT_UNROLL_N 2
914 #define CGEMM_DEFAULT_UNROLL_N 2
915 #define ZGEMM_DEFAULT_UNROLL_N 1
916 #define XGEMM_DEFAULT_UNROLL_N 1
917 
918 #define SGEMM_DEFAULT_P sgemm_p
919 #define SGEMM_DEFAULT_R sgemm_r
920 
921 #define DGEMM_DEFAULT_P dgemm_p
922 #define DGEMM_DEFAULT_R dgemm_r
923 
924 #define QGEMM_DEFAULT_P qgemm_p
925 #define QGEMM_DEFAULT_R qgemm_r
926 
927 #define CGEMM_DEFAULT_P cgemm_p
928 #define CGEMM_DEFAULT_R cgemm_r
929 
930 #define ZGEMM_DEFAULT_P zgemm_p
931 #define ZGEMM_DEFAULT_R zgemm_r
932 
933 #define XGEMM_DEFAULT_P xgemm_p
934 #define XGEMM_DEFAULT_R xgemm_r
935 
936 #define SGEMM_DEFAULT_Q 256
937 #define DGEMM_DEFAULT_Q 256
938 #define QGEMM_DEFAULT_Q 256
939 #define CGEMM_DEFAULT_Q 256
940 #define ZGEMM_DEFAULT_Q 256
941 #define XGEMM_DEFAULT_Q 256
942 
943 #endif
944 
945 
946 #ifdef ITANIUM2
947 
948 #define SNUMOPT		4
949 #define DNUMOPT		4
950 
951 #define GEMM_DEFAULT_OFFSET_A 0
952 #define GEMM_DEFAULT_OFFSET_B 128
953 #define GEMM_DEFAULT_ALIGN 0x03fffUL
954 
955 #define SGEMM_DEFAULT_UNROLL_M 8
956 #define SGEMM_DEFAULT_UNROLL_N 8
957 #define DGEMM_DEFAULT_UNROLL_M 8
958 #define DGEMM_DEFAULT_UNROLL_N 8
959 #define QGEMM_DEFAULT_UNROLL_M 8
960 #define QGEMM_DEFAULT_UNROLL_N 8
961 #define CGEMM_DEFAULT_UNROLL_M 4
962 #define CGEMM_DEFAULT_UNROLL_N 4
963 #define ZGEMM_DEFAULT_UNROLL_M 4
964 #define ZGEMM_DEFAULT_UNROLL_N 4
965 #define XGEMM_DEFAULT_UNROLL_M 4
966 #define XGEMM_DEFAULT_UNROLL_N 4
967 
968 #define SGEMM_DEFAULT_P sgemm_p
969 #define DGEMM_DEFAULT_P dgemm_p
970 #define QGEMM_DEFAULT_P qgemm_p
971 #define CGEMM_DEFAULT_P cgemm_p
972 #define ZGEMM_DEFAULT_P zgemm_p
973 #define XGEMM_DEFAULT_P xgemm_p
974 
975 #define SGEMM_DEFAULT_Q 1024
976 #define DGEMM_DEFAULT_Q 1024
977 #define QGEMM_DEFAULT_Q 1024
978 #define CGEMM_DEFAULT_Q 1024
979 #define ZGEMM_DEFAULT_Q 1024
980 #define XGEMM_DEFAULT_Q 1024
981 
982 #define SGEMM_DEFAULT_R sgemm_r
983 #define DGEMM_DEFAULT_R dgemm_r
984 #define QGEMM_DEFAULT_R qgemm_r
985 #define CGEMM_DEFAULT_R cgemm_r
986 #define ZGEMM_DEFAULT_R zgemm_r
987 #define XGEMM_DEFAULT_R xgemm_r
988 
989 #define SYMV_P	 16
990 
991 #define GETRF_FACTOR 0.65
992 
993 #endif
994 
995 #if defined(EV4) || defined(EV5) || defined(EV6)
996 
997 #ifdef EV4
998 #define SNUMOPT		1
999 #define DNUMOPT		1
1000 #else
1001 #define SNUMOPT		2
1002 #define DNUMOPT		2
1003 #endif
1004 
1005 #define GEMM_DEFAULT_OFFSET_A 512
1006 #define GEMM_DEFAULT_OFFSET_B 512
1007 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1008 
1009 #define SGEMM_DEFAULT_UNROLL_M 4
1010 #define SGEMM_DEFAULT_UNROLL_N 4
1011 #define DGEMM_DEFAULT_UNROLL_M 4
1012 #define DGEMM_DEFAULT_UNROLL_N 4
1013 #define CGEMM_DEFAULT_UNROLL_M 2
1014 #define CGEMM_DEFAULT_UNROLL_N 2
1015 #define ZGEMM_DEFAULT_UNROLL_M 2
1016 #define ZGEMM_DEFAULT_UNROLL_N 2
1017 
1018 #define SYMV_P	 8
1019 
1020 #ifdef EV4
1021 #define SGEMM_DEFAULT_P	 32
1022 #define SGEMM_DEFAULT_Q	112
1023 #define SGEMM_DEFAULT_R	256
1024 
1025 #define DGEMM_DEFAULT_P	 32
1026 #define DGEMM_DEFAULT_Q	 56
1027 #define DGEMM_DEFAULT_R	256
1028 
1029 #define CGEMM_DEFAULT_P	 32
1030 #define CGEMM_DEFAULT_Q	 64
1031 #define CGEMM_DEFAULT_R	240
1032 
1033 #define ZGEMM_DEFAULT_P	 32
1034 #define ZGEMM_DEFAULT_Q	 32
1035 #define ZGEMM_DEFAULT_R	240
1036 #endif
1037 
1038 #ifdef EV5
1039 #define SGEMM_DEFAULT_P	 64
1040 #define SGEMM_DEFAULT_Q	256
1041 
1042 #define DGEMM_DEFAULT_P	 64
1043 #define DGEMM_DEFAULT_Q	128
1044 
1045 #define CGEMM_DEFAULT_P	 64
1046 #define CGEMM_DEFAULT_Q	128
1047 
1048 #define ZGEMM_DEFAULT_P	 64
1049 #define ZGEMM_DEFAULT_Q	 64
1050 #endif
1051 
1052 #ifdef EV6
1053 #define SGEMM_DEFAULT_P	256
1054 #define SGEMM_DEFAULT_Q	512
1055 
1056 #define DGEMM_DEFAULT_P	256
1057 #define DGEMM_DEFAULT_Q	256
1058 
1059 #define CGEMM_DEFAULT_P	256
1060 #define CGEMM_DEFAULT_Q	256
1061 
1062 #define ZGEMM_DEFAULT_P	128
1063 #define ZGEMM_DEFAULT_Q	256
1064 #endif
1065 
1066 #endif
1067 
1068 #ifdef CELL
1069 
1070 #define SNUMOPT		2
1071 #define DNUMOPT		2
1072 
1073 #define GEMM_DEFAULT_OFFSET_A 0
1074 #define GEMM_DEFAULT_OFFSET_B 8192
1075 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1076 
1077 #define SGEMM_DEFAULT_UNROLL_M 16
1078 #define SGEMM_DEFAULT_UNROLL_N 4
1079 #define DGEMM_DEFAULT_UNROLL_M 4
1080 #define DGEMM_DEFAULT_UNROLL_N 4
1081 #define CGEMM_DEFAULT_UNROLL_M 8
1082 #define CGEMM_DEFAULT_UNROLL_N 2
1083 #define ZGEMM_DEFAULT_UNROLL_M 2
1084 #define ZGEMM_DEFAULT_UNROLL_N 2
1085 
1086 #define SGEMM_DEFAULT_P 128
1087 #define DGEMM_DEFAULT_P 128
1088 #define CGEMM_DEFAULT_P 128
1089 #define ZGEMM_DEFAULT_P 128
1090 
1091 #define SGEMM_DEFAULT_Q  512
1092 #define DGEMM_DEFAULT_Q  256
1093 #define CGEMM_DEFAULT_Q  256
1094 #define ZGEMM_DEFAULT_Q  128
1095 
1096 #define SYMV_P	 4
1097 #endif
1098 
1099 #ifdef PPCG4
1100 #define GEMM_DEFAULT_OFFSET_A    0
1101 #define GEMM_DEFAULT_OFFSET_B 1024
1102 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1103 
1104 #define SGEMM_DEFAULT_UNROLL_M 16
1105 #define SGEMM_DEFAULT_UNROLL_N 4
1106 #define DGEMM_DEFAULT_UNROLL_M 4
1107 #define DGEMM_DEFAULT_UNROLL_N 4
1108 #define CGEMM_DEFAULT_UNROLL_M 8
1109 #define CGEMM_DEFAULT_UNROLL_N 2
1110 #define ZGEMM_DEFAULT_UNROLL_M 2
1111 #define ZGEMM_DEFAULT_UNROLL_N 2
1112 
1113 #define SGEMM_DEFAULT_P 256
1114 #define DGEMM_DEFAULT_P 128
1115 #define CGEMM_DEFAULT_P 128
1116 #define ZGEMM_DEFAULT_P  64
1117 
1118 #define SGEMM_DEFAULT_Q 256
1119 #define DGEMM_DEFAULT_Q 256
1120 #define CGEMM_DEFAULT_Q 256
1121 #define ZGEMM_DEFAULT_Q 256
1122 
1123 #define SYMV_P	 4
1124 #endif
1125 
1126 #ifdef PPC970
1127 
1128 #define SNUMOPT		4
1129 #define DNUMOPT		4
1130 
1131 #define GEMM_DEFAULT_OFFSET_A 2688
1132 #define GEMM_DEFAULT_OFFSET_B 3072
1133 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1134 
1135 #define SGEMM_DEFAULT_UNROLL_M 16
1136 #define SGEMM_DEFAULT_UNROLL_N 4
1137 #define DGEMM_DEFAULT_UNROLL_M 4
1138 #define DGEMM_DEFAULT_UNROLL_N 4
1139 #define CGEMM_DEFAULT_UNROLL_M 8
1140 #define CGEMM_DEFAULT_UNROLL_N 2
1141 #define ZGEMM_DEFAULT_UNROLL_M 2
1142 #define ZGEMM_DEFAULT_UNROLL_N 2
1143 
1144 #ifdef OS_LINUX
1145 #if L2_SIZE == 1024976
1146 #define SGEMM_DEFAULT_P 320
1147 #define DGEMM_DEFAULT_P 256
1148 #define CGEMM_DEFAULT_P 256
1149 #define ZGEMM_DEFAULT_P 256
1150 #else
1151 #define SGEMM_DEFAULT_P 176
1152 #define DGEMM_DEFAULT_P 176
1153 #define CGEMM_DEFAULT_P 176
1154 #define ZGEMM_DEFAULT_P 176
1155 #endif
1156 #endif
1157 
1158 #define SGEMM_DEFAULT_Q 512
1159 #define DGEMM_DEFAULT_Q 256
1160 #define CGEMM_DEFAULT_Q 256
1161 #define ZGEMM_DEFAULT_Q 128
1162 
1163 #define SYMV_P	 4
1164 
1165 #endif
1166 
1167 #ifdef PPC440
1168 
1169 #define SNUMOPT		2
1170 #define DNUMOPT		2
1171 
1172 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1173 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1174 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1175 
1176 #define SGEMM_DEFAULT_UNROLL_M 4
1177 #define SGEMM_DEFAULT_UNROLL_N 4
1178 #define DGEMM_DEFAULT_UNROLL_M 4
1179 #define DGEMM_DEFAULT_UNROLL_N 4
1180 #define CGEMM_DEFAULT_UNROLL_M 2
1181 #define CGEMM_DEFAULT_UNROLL_N 2
1182 #define ZGEMM_DEFAULT_UNROLL_M 2
1183 #define ZGEMM_DEFAULT_UNROLL_N 2
1184 
1185 #define SGEMM_DEFAULT_P 512
1186 #define DGEMM_DEFAULT_P 512
1187 #define CGEMM_DEFAULT_P 512
1188 #define ZGEMM_DEFAULT_P 512
1189 
1190 #define SGEMM_DEFAULT_Q 1024
1191 #define DGEMM_DEFAULT_Q  512
1192 #define CGEMM_DEFAULT_Q  512
1193 #define ZGEMM_DEFAULT_Q  256
1194 
1195 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
1196 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
1197 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
1198 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
1199 
1200 #define SYMV_P	 4
1201 #endif
1202 
1203 #ifdef PPC440FP2
1204 
1205 #define SNUMOPT		4
1206 #define DNUMOPT		4
1207 
1208 #define GEMM_DEFAULT_OFFSET_A (32 * 0)
1209 #define GEMM_DEFAULT_OFFSET_B (32 * 0)
1210 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1211 
1212 #define SGEMM_DEFAULT_UNROLL_M 8
1213 #define SGEMM_DEFAULT_UNROLL_N 4
1214 #define DGEMM_DEFAULT_UNROLL_M 8
1215 #define DGEMM_DEFAULT_UNROLL_N 4
1216 #define CGEMM_DEFAULT_UNROLL_M 4
1217 #define CGEMM_DEFAULT_UNROLL_N 2
1218 #define ZGEMM_DEFAULT_UNROLL_M 4
1219 #define ZGEMM_DEFAULT_UNROLL_N 2
1220 
1221 #define SGEMM_DEFAULT_P 128
1222 #define DGEMM_DEFAULT_P 128
1223 #define CGEMM_DEFAULT_P 128
1224 #define ZGEMM_DEFAULT_P 128
1225 #if 1
1226 #define SGEMM_DEFAULT_Q 4096
1227 #define DGEMM_DEFAULT_Q 3072
1228 #define CGEMM_DEFAULT_Q 2048
1229 #define ZGEMM_DEFAULT_Q 1024
1230 #else
1231 #define SGEMM_DEFAULT_Q  512
1232 #define DGEMM_DEFAULT_Q  256
1233 #define CGEMM_DEFAULT_Q  256
1234 #define ZGEMM_DEFAULT_Q  128
1235 #endif
1236 
1237 #define SYMV_P	 4
1238 #endif
1239 
1240 
1241 
1242 #if defined(POWER3) || defined(POWER4) || defined(POWER5)
1243 #define GEMM_DEFAULT_OFFSET_A 0
1244 #define GEMM_DEFAULT_OFFSET_B 2048
1245 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1246 
1247 #define SGEMM_DEFAULT_UNROLL_M 4
1248 #define SGEMM_DEFAULT_UNROLL_N 4
1249 #define DGEMM_DEFAULT_UNROLL_M 4
1250 #define DGEMM_DEFAULT_UNROLL_N 4
1251 #define CGEMM_DEFAULT_UNROLL_M 2
1252 #define CGEMM_DEFAULT_UNROLL_N 2
1253 #define ZGEMM_DEFAULT_UNROLL_M 2
1254 #define ZGEMM_DEFAULT_UNROLL_N 2
1255 
1256 #ifdef POWER3
1257 
1258 #define SNUMOPT		4
1259 #define DNUMOPT		4
1260 
1261 #define SGEMM_DEFAULT_P 256
1262 #define SGEMM_DEFAULT_Q 432
1263 #define SGEMM_DEFAULT_R 1012
1264 
1265 #define DGEMM_DEFAULT_P 256
1266 #define DGEMM_DEFAULT_Q 216
1267 #define DGEMM_DEFAULT_R 1012
1268 
1269 #define ZGEMM_DEFAULT_P 256
1270 #define ZGEMM_DEFAULT_Q 104
1271 #define ZGEMM_DEFAULT_R 1012
1272 #endif
1273 
1274 #if defined(POWER4)
1275 #ifdef ALLOC_HUGETLB
1276 #define SGEMM_DEFAULT_P 184
1277 #define DGEMM_DEFAULT_P 184
1278 #define CGEMM_DEFAULT_P 184
1279 #define ZGEMM_DEFAULT_P 184
1280 #else
1281 #define SGEMM_DEFAULT_P 144
1282 #define DGEMM_DEFAULT_P 144
1283 #define CGEMM_DEFAULT_P 144
1284 #define ZGEMM_DEFAULT_P 144
1285 #endif
1286 #endif
1287 
1288 #if defined(POWER5)
1289 #ifdef ALLOC_HUGETLB
1290 #define SGEMM_DEFAULT_P 512
1291 #define DGEMM_DEFAULT_P 256
1292 #define CGEMM_DEFAULT_P 256
1293 #define ZGEMM_DEFAULT_P 128
1294 #else
1295 #define SGEMM_DEFAULT_P 320
1296 #define DGEMM_DEFAULT_P 160
1297 #define CGEMM_DEFAULT_P 160
1298 #define ZGEMM_DEFAULT_P  80
1299 #endif
1300 
1301 #define SGEMM_DEFAULT_Q 256
1302 #define CGEMM_DEFAULT_Q 256
1303 #define DGEMM_DEFAULT_Q 256
1304 #define ZGEMM_DEFAULT_Q 256
1305 #endif
1306 
1307 #define SYMV_P	 8
1308 
1309 #endif
1310 
1311 #if defined(POWER6)
1312 
1313 #define SNUMOPT		4
1314 #define DNUMOPT		4
1315 
1316 #define GEMM_DEFAULT_OFFSET_A  384
1317 #define GEMM_DEFAULT_OFFSET_B 1024
1318 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1319 
1320 #define SGEMM_DEFAULT_UNROLL_M 4
1321 #define SGEMM_DEFAULT_UNROLL_N 4
1322 #define DGEMM_DEFAULT_UNROLL_M 4
1323 #define DGEMM_DEFAULT_UNROLL_N 4
1324 #define CGEMM_DEFAULT_UNROLL_M 2
1325 #define CGEMM_DEFAULT_UNROLL_N 4
1326 #define ZGEMM_DEFAULT_UNROLL_M 2
1327 #define ZGEMM_DEFAULT_UNROLL_N 4
1328 
1329 #define SGEMM_DEFAULT_P  992
1330 #define DGEMM_DEFAULT_P  480
1331 #define CGEMM_DEFAULT_P  488
1332 #define ZGEMM_DEFAULT_P  248
1333 
1334 #define SGEMM_DEFAULT_Q  504
1335 #define DGEMM_DEFAULT_Q  504
1336 #define CGEMM_DEFAULT_Q  400
1337 #define ZGEMM_DEFAULT_Q  400
1338 
1339 #define SYMV_P	 8
1340 
1341 #endif
1342 
1343 #if defined(SPARC) && defined(V7)
1344 
1345 #define SNUMOPT		4
1346 #define DNUMOPT		4
1347 
1348 #define GEMM_DEFAULT_OFFSET_A 0
1349 #define GEMM_DEFAULT_OFFSET_B 2048
1350 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1351 
1352 #define SGEMM_DEFAULT_UNROLL_M 2
1353 #define SGEMM_DEFAULT_UNROLL_N 8
1354 #define DGEMM_DEFAULT_UNROLL_M 2
1355 #define DGEMM_DEFAULT_UNROLL_N 8
1356 #define CGEMM_DEFAULT_UNROLL_M 1
1357 #define CGEMM_DEFAULT_UNROLL_N 4
1358 #define ZGEMM_DEFAULT_UNROLL_M 1
1359 #define ZGEMM_DEFAULT_UNROLL_N 4
1360 
1361 #define SGEMM_DEFAULT_P  256
1362 #define DGEMM_DEFAULT_P  256
1363 #define CGEMM_DEFAULT_P  256
1364 #define ZGEMM_DEFAULT_P  256
1365 
1366 #define SGEMM_DEFAULT_Q  512
1367 #define DGEMM_DEFAULT_Q  256
1368 #define CGEMM_DEFAULT_Q  256
1369 #define ZGEMM_DEFAULT_Q  128
1370 
1371 #define SYMV_P	 8
1372 #define GEMM_THREAD gemm_thread_mn
1373 #endif
1374 
1375 #if defined(SPARC) && defined(V9)
1376 
1377 #define SNUMOPT		2
1378 #define DNUMOPT		2
1379 
1380 #define GEMM_DEFAULT_OFFSET_A 0
1381 #define GEMM_DEFAULT_OFFSET_B 2048
1382 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1383 
1384 #define SGEMM_DEFAULT_UNROLL_M 4
1385 #define SGEMM_DEFAULT_UNROLL_N 4
1386 #define DGEMM_DEFAULT_UNROLL_M 4
1387 #define DGEMM_DEFAULT_UNROLL_N 4
1388 #define CGEMM_DEFAULT_UNROLL_M 2
1389 #define CGEMM_DEFAULT_UNROLL_N 2
1390 #define ZGEMM_DEFAULT_UNROLL_M 2
1391 #define ZGEMM_DEFAULT_UNROLL_N 2
1392 
1393 #define SGEMM_DEFAULT_P  512
1394 #define DGEMM_DEFAULT_P  512
1395 #define CGEMM_DEFAULT_P  512
1396 #define ZGEMM_DEFAULT_P  512
1397 
1398 #define SGEMM_DEFAULT_Q 1024
1399 #define DGEMM_DEFAULT_Q  512
1400 #define CGEMM_DEFAULT_Q  512
1401 #define ZGEMM_DEFAULT_Q  256
1402 
1403 #define SYMV_P	 8
1404 #endif
1405 
1406 #ifdef SICORTEX
1407 
1408 #define SNUMOPT		2
1409 #define DNUMOPT		2
1410 
1411 #define GEMM_DEFAULT_OFFSET_A 0
1412 #define GEMM_DEFAULT_OFFSET_B 0
1413 #define GEMM_DEFAULT_ALIGN 0x03fffUL
1414 
1415 #define SGEMM_DEFAULT_UNROLL_M  2
1416 #define SGEMM_DEFAULT_UNROLL_N  8
1417 #define DGEMM_DEFAULT_UNROLL_M  2
1418 #define DGEMM_DEFAULT_UNROLL_N  8
1419 #define CGEMM_DEFAULT_UNROLL_M  1
1420 #define CGEMM_DEFAULT_UNROLL_N  4
1421 #define ZGEMM_DEFAULT_UNROLL_M  1
1422 #define ZGEMM_DEFAULT_UNROLL_N  4
1423 
1424 #define SGEMM_DEFAULT_P 108
1425 #define DGEMM_DEFAULT_P 112
1426 #define CGEMM_DEFAULT_P 108
1427 #define ZGEMM_DEFAULT_P 112
1428 
1429 #define SGEMM_DEFAULT_Q 288
1430 #define DGEMM_DEFAULT_Q 144
1431 #define CGEMM_DEFAULT_Q 144
1432 #define ZGEMM_DEFAULT_Q  72
1433 
1434 #define SGEMM_DEFAULT_R 2000
1435 #define DGEMM_DEFAULT_R 2000
1436 #define CGEMM_DEFAULT_R 2000
1437 #define ZGEMM_DEFAULT_R 2000
1438 
1439 #define SYMV_P	16
1440 #endif
1441 
1442 #ifdef GENERIC
1443 
1444 #define SNUMOPT		2
1445 #define DNUMOPT		2
1446 
1447 #define GEMM_DEFAULT_OFFSET_A 0
1448 #define GEMM_DEFAULT_OFFSET_B 0
1449 #define GEMM_DEFAULT_ALIGN 0x0ffffUL
1450 
1451 #define SGEMM_DEFAULT_UNROLL_N 4
1452 #define DGEMM_DEFAULT_UNROLL_N 4
1453 #define QGEMM_DEFAULT_UNROLL_N 2
1454 #define CGEMM_DEFAULT_UNROLL_N 2
1455 #define ZGEMM_DEFAULT_UNROLL_N 2
1456 #define XGEMM_DEFAULT_UNROLL_N 1
1457 
1458 #ifdef ARCH_X86
1459 #define SGEMM_DEFAULT_UNROLL_M 4
1460 #define DGEMM_DEFAULT_UNROLL_M 2
1461 #define QGEMM_DEFAULT_UNROLL_M 2
1462 #define CGEMM_DEFAULT_UNROLL_M 2
1463 #define ZGEMM_DEFAULT_UNROLL_M 1
1464 #define XGEMM_DEFAULT_UNROLL_M 1
1465 #else
1466 #define SGEMM_DEFAULT_UNROLL_M 8
1467 #define DGEMM_DEFAULT_UNROLL_M 4
1468 #define QGEMM_DEFAULT_UNROLL_M 2
1469 #define CGEMM_DEFAULT_UNROLL_M 4
1470 #define ZGEMM_DEFAULT_UNROLL_M 2
1471 #define XGEMM_DEFAULT_UNROLL_M 1
1472 #endif
1473 
1474 #define SGEMM_P sgemm_p
1475 #define DGEMM_P dgemm_p
1476 #define QGEMM_P qgemm_p
1477 #define CGEMM_P cgemm_p
1478 #define ZGEMM_P zgemm_p
1479 #define XGEMM_P xgemm_p
1480 
1481 #define SGEMM_R sgemm_r
1482 #define DGEMM_R dgemm_r
1483 #define QGEMM_R qgemm_r
1484 #define CGEMM_R cgemm_r
1485 #define ZGEMM_R zgemm_r
1486 #define XGEMM_R xgemm_r
1487 
1488 #define SGEMM_Q 128
1489 #define DGEMM_Q 128
1490 #define QGEMM_Q 128
1491 #define CGEMM_Q 128
1492 #define ZGEMM_Q 128
1493 #define XGEMM_Q 128
1494 
1495 #define SYMV_P	16
1496 
1497 #endif
1498 
1499 #ifndef QGEMM_DEFAULT_UNROLL_M
1500 #define QGEMM_DEFAULT_UNROLL_M 2
1501 #endif
1502 
1503 #ifndef QGEMM_DEFAULT_UNROLL_N
1504 #define QGEMM_DEFAULT_UNROLL_N 2
1505 #endif
1506 
1507 #ifndef XGEMM_DEFAULT_UNROLL_M
1508 #define XGEMM_DEFAULT_UNROLL_M 2
1509 #endif
1510 
1511 #ifndef XGEMM_DEFAULT_UNROLL_N
1512 #define XGEMM_DEFAULT_UNROLL_N 2
1513 #endif
1514 
1515 #ifndef HAVE_SSE2
1516 #define SHUFPD_0	shufps	$0x44,
1517 #define SHUFPD_1	shufps	$0x4e,
1518 #define SHUFPD_2	shufps	$0xe4,
1519 #define SHUFPD_3	shufps	$0xee,
1520 #endif
1521 
1522 #ifndef SHUFPD_0
1523 #define SHUFPD_0	shufpd	$0,
1524 #endif
1525 
1526 #ifndef SHUFPD_1
1527 #define SHUFPD_1	shufpd	$1,
1528 #endif
1529 
1530 #ifndef SHUFPD_2
1531 #define SHUFPD_2	shufpd	$2,
1532 #endif
1533 
1534 #ifndef SHUFPD_3
1535 #define SHUFPD_3	shufpd	$3,
1536 #endif
1537 
1538 #ifndef SHUFPS_39
1539 #define SHUFPS_39	shufps	$0x39,
1540 #endif
1541 
1542 
1543 #endif
1544