1 /*********************************************************************/ 2 /* Copyright 2009, 2010 The University of Texas at Austin. */ 3 /* All rights reserved. */ 4 /* */ 5 /* Redistribution and use in source and binary forms, with or */ 6 /* without modification, are permitted provided that the following */ 7 /* conditions are met: */ 8 /* */ 9 /* 1. Redistributions of source code must retain the above */ 10 /* copyright notice, this list of conditions and the following */ 11 /* disclaimer. */ 12 /* */ 13 /* 2. Redistributions in binary form must reproduce the above */ 14 /* copyright notice, this list of conditions and the following */ 15 /* disclaimer in the documentation and/or other materials */ 16 /* provided with the distribution. */ 17 /* */ 18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 31 /* POSSIBILITY OF SUCH DAMAGE. */ 32 /* */ 33 /* The views and conclusions contained in the software and */ 34 /* documentation are those of the authors and should not be */ 35 /* interpreted as representing official policies, either expressed */ 36 /* or implied, of The University of Texas at Austin. */ 37 /*********************************************************************/ 38 39 #ifndef PARAM_H 40 #define PARAM_H 41 42 #ifdef OPTERON 43 44 #define SNUMOPT 4 45 #define DNUMOPT 2 46 47 #define GEMM_DEFAULT_OFFSET_A 64 48 #define GEMM_DEFAULT_OFFSET_B 256 49 #define GEMM_DEFAULT_ALIGN 0x01ffffUL 50 51 #define SGEMM_DEFAULT_UNROLL_N 4 52 #define DGEMM_DEFAULT_UNROLL_N 4 53 #define QGEMM_DEFAULT_UNROLL_N 2 54 #define CGEMM_DEFAULT_UNROLL_N 2 55 #define ZGEMM_DEFAULT_UNROLL_N 2 56 #define XGEMM_DEFAULT_UNROLL_N 1 57 58 #ifdef ARCH_X86 59 #define SGEMM_DEFAULT_UNROLL_M 4 60 #define DGEMM_DEFAULT_UNROLL_M 2 61 #define QGEMM_DEFAULT_UNROLL_M 2 62 #define CGEMM_DEFAULT_UNROLL_M 2 63 #define ZGEMM_DEFAULT_UNROLL_M 1 64 #define XGEMM_DEFAULT_UNROLL_M 1 65 #else 66 #define SGEMM_DEFAULT_UNROLL_M 8 67 #define DGEMM_DEFAULT_UNROLL_M 4 68 #define QGEMM_DEFAULT_UNROLL_M 2 69 #define CGEMM_DEFAULT_UNROLL_M 4 70 #define ZGEMM_DEFAULT_UNROLL_M 2 71 #define XGEMM_DEFAULT_UNROLL_M 1 72 #endif 73 74 #define SGEMM_DEFAULT_P sgemm_p 75 #define DGEMM_DEFAULT_P dgemm_p 76 #define QGEMM_DEFAULT_P qgemm_p 77 #define CGEMM_DEFAULT_P cgemm_p 78 #define ZGEMM_DEFAULT_P zgemm_p 79 #define XGEMM_DEFAULT_P xgemm_p 80 81 #define SGEMM_DEFAULT_R sgemm_r 82 #define DGEMM_DEFAULT_R dgemm_r 83 #define QGEMM_DEFAULT_R qgemm_r 84 #define CGEMM_DEFAULT_R cgemm_r 85 #define ZGEMM_DEFAULT_R zgemm_r 86 #define XGEMM_DEFAULT_R xgemm_r 87 88 #ifdef ALLOC_HUGETLB 89 90 #define SGEMM_DEFAULT_Q 248 91 #define DGEMM_DEFAULT_Q 248 92 #define QGEMM_DEFAULT_Q 248 93 #define CGEMM_DEFAULT_Q 248 94 #define ZGEMM_DEFAULT_Q 248 95 #define XGEMM_DEFAULT_Q 248 96 97 #else 98 99 #define SGEMM_DEFAULT_Q 240 100 #define DGEMM_DEFAULT_Q 240 101 #define QGEMM_DEFAULT_Q 240 102 #define CGEMM_DEFAULT_Q 240 103 #define ZGEMM_DEFAULT_Q 240 104 #define XGEMM_DEFAULT_Q 240 105 106 #endif 107 108 109 #define SYMV_P 16 110 #define HAVE_EXCLUSIVE_CACHE 111 112 #endif 113 114 #if defined(BARCELONA) || defined(SHANGHAI) 115 116 #define SNUMOPT 8 117 #define DNUMOPT 4 118 119 #define GEMM_DEFAULT_OFFSET_A 64 120 #define GEMM_DEFAULT_OFFSET_B 832 121 #define GEMM_DEFAULT_ALIGN 0x0fffUL 122 123 #define SGEMM_DEFAULT_UNROLL_N 4 124 #define DGEMM_DEFAULT_UNROLL_N 4 125 #define QGEMM_DEFAULT_UNROLL_N 2 126 #define CGEMM_DEFAULT_UNROLL_N 2 127 #define ZGEMM_DEFAULT_UNROLL_N 2 128 #define XGEMM_DEFAULT_UNROLL_N 1 129 130 #ifdef ARCH_X86 131 #define SGEMM_DEFAULT_UNROLL_M 4 132 #define DGEMM_DEFAULT_UNROLL_M 2 133 #define QGEMM_DEFAULT_UNROLL_M 2 134 #define CGEMM_DEFAULT_UNROLL_M 2 135 #define ZGEMM_DEFAULT_UNROLL_M 1 136 #define XGEMM_DEFAULT_UNROLL_M 1 137 #else 138 #define SGEMM_DEFAULT_UNROLL_M 8 139 #define DGEMM_DEFAULT_UNROLL_M 4 140 #define QGEMM_DEFAULT_UNROLL_M 2 141 #define CGEMM_DEFAULT_UNROLL_M 4 142 #define ZGEMM_DEFAULT_UNROLL_M 2 143 #define XGEMM_DEFAULT_UNROLL_M 1 144 #endif 145 146 #if 0 147 #define SGEMM_DEFAULT_P 496 148 #define DGEMM_DEFAULT_P 248 149 #define QGEMM_DEFAULT_P 124 150 #define CGEMM_DEFAULT_P 248 151 #define ZGEMM_DEFAULT_P 124 152 #define XGEMM_DEFAULT_P 62 153 154 #define SGEMM_DEFAULT_Q 248 155 #define DGEMM_DEFAULT_Q 248 156 #define QGEMM_DEFAULT_Q 248 157 #define CGEMM_DEFAULT_Q 248 158 #define ZGEMM_DEFAULT_Q 248 159 #define XGEMM_DEFAULT_Q 248 160 161 #else 162 163 #define SGEMM_DEFAULT_P 448 164 #define DGEMM_DEFAULT_P 224 165 #define QGEMM_DEFAULT_P 112 166 #define CGEMM_DEFAULT_P 224 167 #define ZGEMM_DEFAULT_P 112 168 #define XGEMM_DEFAULT_P 56 169 170 #define SGEMM_DEFAULT_Q 224 171 #define DGEMM_DEFAULT_Q 224 172 #define QGEMM_DEFAULT_Q 224 173 #define CGEMM_DEFAULT_Q 224 174 #define ZGEMM_DEFAULT_Q 224 175 #define XGEMM_DEFAULT_Q 224 176 177 #endif 178 179 #define SGEMM_DEFAULT_R sgemm_r 180 #define QGEMM_DEFAULT_R qgemm_r 181 #define DGEMM_DEFAULT_R dgemm_r 182 #define CGEMM_DEFAULT_R cgemm_r 183 #define ZGEMM_DEFAULT_R zgemm_r 184 #define XGEMM_DEFAULT_R xgemm_r 185 186 #define SYMV_P 16 187 #define HAVE_EXCLUSIVE_CACHE 188 189 #define GEMM_THREAD gemm_thread_mn 190 191 #endif 192 193 #ifdef ATHLON 194 195 #define SNUMOPT 4 196 #define DNUMOPT 2 197 198 #define GEMM_DEFAULT_OFFSET_A 0 199 #define GEMM_DEFAULT_OFFSET_B 384 200 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 201 202 #define SGEMM_DEFAULT_UNROLL_N 4 203 #define DGEMM_DEFAULT_UNROLL_N 4 204 #define QGEMM_DEFAULT_UNROLL_N 2 205 #define CGEMM_DEFAULT_UNROLL_N 2 206 #define ZGEMM_DEFAULT_UNROLL_N 2 207 #define XGEMM_DEFAULT_UNROLL_N 1 208 209 #define SGEMM_DEFAULT_UNROLL_M 2 210 #define DGEMM_DEFAULT_UNROLL_M 1 211 #define QGEMM_DEFAULT_UNROLL_M 2 212 #define CGEMM_DEFAULT_UNROLL_M 1 213 #define ZGEMM_DEFAULT_UNROLL_M 1 214 #define XGEMM_DEFAULT_UNROLL_M 1 215 216 #define SGEMM_DEFAULT_R sgemm_r 217 #define DGEMM_DEFAULT_R dgemm_r 218 #define QGEMM_DEFAULT_R qgemm_r 219 #define CGEMM_DEFAULT_R cgemm_r 220 #define ZGEMM_DEFAULT_R zgemm_r 221 #define XGEMM_DEFAULT_R xgemm_r 222 223 #define SGEMM_DEFAULT_P 208 224 #define DGEMM_DEFAULT_P 104 225 #define QGEMM_DEFAULT_P 56 226 #define CGEMM_DEFAULT_P 104 227 #define ZGEMM_DEFAULT_P 56 228 #define XGEMM_DEFAULT_P 28 229 230 #define SGEMM_DEFAULT_Q 208 231 #define DGEMM_DEFAULT_Q 208 232 #define QGEMM_DEFAULT_Q 208 233 #define CGEMM_DEFAULT_Q 208 234 #define ZGEMM_DEFAULT_Q 208 235 #define XGEMM_DEFAULT_Q 208 236 237 #define SYMV_P 16 238 #define HAVE_EXCLUSIVE_CACHE 239 #endif 240 241 #ifdef VIAC3 242 243 #define SNUMOPT 2 244 #define DNUMOPT 1 245 246 #define GEMM_DEFAULT_OFFSET_A 0 247 #define GEMM_DEFAULT_OFFSET_B 256 248 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 249 250 #define SGEMM_DEFAULT_UNROLL_N 4 251 #define DGEMM_DEFAULT_UNROLL_N 4 252 #define QGEMM_DEFAULT_UNROLL_N 2 253 #define CGEMM_DEFAULT_UNROLL_N 2 254 #define ZGEMM_DEFAULT_UNROLL_N 2 255 #define XGEMM_DEFAULT_UNROLL_N 1 256 257 #define SGEMM_DEFAULT_UNROLL_M 2 258 #define DGEMM_DEFAULT_UNROLL_M 1 259 #define QGEMM_DEFAULT_UNROLL_M 2 260 #define CGEMM_DEFAULT_UNROLL_M 1 261 #define ZGEMM_DEFAULT_UNROLL_M 1 262 #define XGEMM_DEFAULT_UNROLL_M 1 263 264 #define SGEMM_DEFAULT_R sgemm_r 265 #define DGEMM_DEFAULT_R dgemm_r 266 #define QGEMM_DEFAULT_R qgemm_r 267 #define CGEMM_DEFAULT_R cgemm_r 268 #define ZGEMM_DEFAULT_R zgemm_r 269 #define XGEMM_DEFAULT_R xgemm_r 270 271 #define SGEMM_DEFAULT_P 128 272 #define DGEMM_DEFAULT_P 128 273 #define QGEMM_DEFAULT_P 128 274 #define CGEMM_DEFAULT_P 128 275 #define ZGEMM_DEFAULT_P 128 276 #define XGEMM_DEFAULT_P 128 277 278 #define SGEMM_DEFAULT_Q 512 279 #define DGEMM_DEFAULT_Q 256 280 #define QGEMM_DEFAULT_Q 256 281 #define CGEMM_DEFAULT_Q 256 282 #define ZGEMM_DEFAULT_Q 128 283 #define XGEMM_DEFAULT_Q 128 284 285 #define SYMV_P 16 286 #endif 287 288 #ifdef NANO 289 290 #define SNUMOPT 4 291 #define DNUMOPT 2 292 293 #define GEMM_DEFAULT_OFFSET_A 64 294 #define GEMM_DEFAULT_OFFSET_B 256 295 #define GEMM_DEFAULT_ALIGN 0x01ffffUL 296 297 #ifdef ARCH_X86 298 #define SGEMM_DEFAULT_UNROLL_N 4 299 #define DGEMM_DEFAULT_UNROLL_N 4 300 #define QGEMM_DEFAULT_UNROLL_N 2 301 #define CGEMM_DEFAULT_UNROLL_N 2 302 #define ZGEMM_DEFAULT_UNROLL_N 2 303 #define XGEMM_DEFAULT_UNROLL_N 1 304 305 #define SGEMM_DEFAULT_UNROLL_M 4 306 #define DGEMM_DEFAULT_UNROLL_M 2 307 #define QGEMM_DEFAULT_UNROLL_M 2 308 #define CGEMM_DEFAULT_UNROLL_M 2 309 #define ZGEMM_DEFAULT_UNROLL_M 1 310 #define XGEMM_DEFAULT_UNROLL_M 1 311 #else 312 #define SGEMM_DEFAULT_UNROLL_N 8 313 #define DGEMM_DEFAULT_UNROLL_N 4 314 #define QGEMM_DEFAULT_UNROLL_N 2 315 #define CGEMM_DEFAULT_UNROLL_N 4 316 #define ZGEMM_DEFAULT_UNROLL_N 2 317 #define XGEMM_DEFAULT_UNROLL_N 1 318 319 #define SGEMM_DEFAULT_UNROLL_M 4 320 #define DGEMM_DEFAULT_UNROLL_M 4 321 #define QGEMM_DEFAULT_UNROLL_M 2 322 #define CGEMM_DEFAULT_UNROLL_M 2 323 #define ZGEMM_DEFAULT_UNROLL_M 2 324 #define XGEMM_DEFAULT_UNROLL_M 1 325 #endif 326 327 #define SGEMM_DEFAULT_P 288 328 #define DGEMM_DEFAULT_P 288 329 #define QGEMM_DEFAULT_P 288 330 #define CGEMM_DEFAULT_P 288 331 #define ZGEMM_DEFAULT_P 288 332 #define XGEMM_DEFAULT_P 288 333 334 #define SGEMM_DEFAULT_R sgemm_r 335 #define DGEMM_DEFAULT_R dgemm_r 336 #define QGEMM_DEFAULT_R qgemm_r 337 #define CGEMM_DEFAULT_R cgemm_r 338 #define ZGEMM_DEFAULT_R zgemm_r 339 #define XGEMM_DEFAULT_R xgemm_r 340 341 #define SGEMM_DEFAULT_Q 256 342 #define DGEMM_DEFAULT_Q 128 343 #define QGEMM_DEFAULT_Q 64 344 #define CGEMM_DEFAULT_Q 128 345 #define ZGEMM_DEFAULT_Q 64 346 #define XGEMM_DEFAULT_Q 32 347 348 #define SYMV_P 16 349 #define HAVE_EXCLUSIVE_CACHE 350 351 #endif 352 353 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3) 354 355 #ifdef HAVE_SSE 356 #define SNUMOPT 2 357 #else 358 #define SNUMOPT 1 359 #endif 360 #define DNUMOPT 1 361 362 #define GEMM_DEFAULT_OFFSET_A 0 363 #define GEMM_DEFAULT_OFFSET_B 0 364 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 365 366 #ifdef HAVE_SSE 367 #define SGEMM_DEFAULT_UNROLL_M 8 368 #define CGEMM_DEFAULT_UNROLL_M 4 369 #else 370 #define SGEMM_DEFAULT_UNROLL_M 4 371 #define CGEMM_DEFAULT_UNROLL_M 2 372 #endif 373 #define DGEMM_DEFAULT_UNROLL_M 2 374 #define SGEMM_DEFAULT_UNROLL_N 2 375 #define DGEMM_DEFAULT_UNROLL_N 2 376 #define QGEMM_DEFAULT_UNROLL_M 2 377 #define QGEMM_DEFAULT_UNROLL_N 2 378 #define CGEMM_DEFAULT_UNROLL_N 1 379 #define ZGEMM_DEFAULT_UNROLL_M 1 380 #define ZGEMM_DEFAULT_UNROLL_N 1 381 #define XGEMM_DEFAULT_UNROLL_M 1 382 #define XGEMM_DEFAULT_UNROLL_N 1 383 384 #define SGEMM_DEFAULT_P sgemm_p 385 #define SGEMM_DEFAULT_Q 256 386 #define SGEMM_DEFAULT_R sgemm_r 387 388 #define DGEMM_DEFAULT_P dgemm_p 389 #define DGEMM_DEFAULT_Q 256 390 #define DGEMM_DEFAULT_R dgemm_r 391 392 #define QGEMM_DEFAULT_P qgemm_p 393 #define QGEMM_DEFAULT_Q 256 394 #define QGEMM_DEFAULT_R qgemm_r 395 396 #define CGEMM_DEFAULT_P cgemm_p 397 #define CGEMM_DEFAULT_Q 256 398 #define CGEMM_DEFAULT_R cgemm_r 399 400 #define ZGEMM_DEFAULT_P zgemm_p 401 #define ZGEMM_DEFAULT_Q 256 402 #define ZGEMM_DEFAULT_R zgemm_r 403 404 #define XGEMM_DEFAULT_P xgemm_p 405 #define XGEMM_DEFAULT_Q 256 406 #define XGEMM_DEFAULT_R xgemm_r 407 408 #define SYMV_P 4 409 410 #endif 411 412 #ifdef PENTIUMM 413 414 #define SNUMOPT 2 415 #define DNUMOPT 1 416 417 #define GEMM_DEFAULT_OFFSET_A 0 418 #define GEMM_DEFAULT_OFFSET_B 0 419 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 420 421 #ifdef CORE_YONAH 422 #define SGEMM_DEFAULT_UNROLL_M 4 423 #define SGEMM_DEFAULT_UNROLL_N 4 424 #define DGEMM_DEFAULT_UNROLL_M 2 425 #define DGEMM_DEFAULT_UNROLL_N 4 426 #define QGEMM_DEFAULT_UNROLL_M 2 427 #define QGEMM_DEFAULT_UNROLL_N 2 428 #define CGEMM_DEFAULT_UNROLL_M 2 429 #define CGEMM_DEFAULT_UNROLL_N 2 430 #define ZGEMM_DEFAULT_UNROLL_M 1 431 #define ZGEMM_DEFAULT_UNROLL_N 2 432 #define XGEMM_DEFAULT_UNROLL_M 1 433 #define XGEMM_DEFAULT_UNROLL_N 1 434 #else 435 #define SGEMM_DEFAULT_UNROLL_M 8 436 #define SGEMM_DEFAULT_UNROLL_N 2 437 #define DGEMM_DEFAULT_UNROLL_M 2 438 #define DGEMM_DEFAULT_UNROLL_N 2 439 #define QGEMM_DEFAULT_UNROLL_M 2 440 #define QGEMM_DEFAULT_UNROLL_N 2 441 #define CGEMM_DEFAULT_UNROLL_M 4 442 #define CGEMM_DEFAULT_UNROLL_N 1 443 #define ZGEMM_DEFAULT_UNROLL_M 1 444 #define ZGEMM_DEFAULT_UNROLL_N 1 445 #define XGEMM_DEFAULT_UNROLL_M 1 446 #define XGEMM_DEFAULT_UNROLL_N 1 447 448 #endif 449 450 #define SGEMM_DEFAULT_P sgemm_p 451 #define SGEMM_DEFAULT_Q 256 452 #define SGEMM_DEFAULT_R sgemm_r 453 454 #define DGEMM_DEFAULT_P dgemm_p 455 #define DGEMM_DEFAULT_Q 256 456 #define DGEMM_DEFAULT_R dgemm_r 457 458 #define QGEMM_DEFAULT_P qgemm_p 459 #define QGEMM_DEFAULT_Q 256 460 #define QGEMM_DEFAULT_R qgemm_r 461 462 #define CGEMM_DEFAULT_P cgemm_p 463 #define CGEMM_DEFAULT_Q 256 464 #define CGEMM_DEFAULT_R cgemm_r 465 466 #define ZGEMM_DEFAULT_P zgemm_p 467 #define ZGEMM_DEFAULT_Q 256 468 #define ZGEMM_DEFAULT_R zgemm_r 469 470 #define XGEMM_DEFAULT_P xgemm_p 471 #define XGEMM_DEFAULT_Q 256 472 #define XGEMM_DEFAULT_R xgemm_r 473 474 #define SYMV_P 4 475 #endif 476 477 #ifdef CORE_NORTHWOOD 478 479 #define SNUMOPT 4 480 #define DNUMOPT 2 481 482 #define GEMM_DEFAULT_OFFSET_A 0 483 #define GEMM_DEFAULT_OFFSET_B 32 484 485 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 486 487 #define SYMV_P 8 488 489 #define SGEMM_DEFAULT_UNROLL_M 8 490 #define DGEMM_DEFAULT_UNROLL_M 4 491 #define QGEMM_DEFAULT_UNROLL_M 2 492 #define CGEMM_DEFAULT_UNROLL_M 4 493 #define ZGEMM_DEFAULT_UNROLL_M 2 494 #define XGEMM_DEFAULT_UNROLL_M 1 495 496 #define SGEMM_DEFAULT_UNROLL_N 2 497 #define DGEMM_DEFAULT_UNROLL_N 2 498 #define QGEMM_DEFAULT_UNROLL_N 2 499 #define CGEMM_DEFAULT_UNROLL_N 1 500 #define ZGEMM_DEFAULT_UNROLL_N 1 501 #define XGEMM_DEFAULT_UNROLL_N 1 502 503 #define SGEMM_DEFAULT_P sgemm_p 504 #define SGEMM_DEFAULT_R sgemm_r 505 506 #define DGEMM_DEFAULT_P dgemm_p 507 #define DGEMM_DEFAULT_R dgemm_r 508 509 #define QGEMM_DEFAULT_P qgemm_p 510 #define QGEMM_DEFAULT_R qgemm_r 511 512 #define CGEMM_DEFAULT_P cgemm_p 513 #define CGEMM_DEFAULT_R cgemm_r 514 515 #define ZGEMM_DEFAULT_P zgemm_p 516 #define ZGEMM_DEFAULT_R zgemm_r 517 518 #define XGEMM_DEFAULT_P xgemm_p 519 #define XGEMM_DEFAULT_R xgemm_r 520 521 #define SGEMM_DEFAULT_Q 128 522 #define DGEMM_DEFAULT_Q 128 523 #define QGEMM_DEFAULT_Q 128 524 #define CGEMM_DEFAULT_Q 128 525 #define ZGEMM_DEFAULT_Q 128 526 #define XGEMM_DEFAULT_Q 128 527 #endif 528 529 #ifdef CORE_PRESCOTT 530 531 #define SNUMOPT 4 532 #define DNUMOPT 2 533 534 #ifndef __64BIT__ 535 #define GEMM_DEFAULT_OFFSET_A 128 536 #define GEMM_DEFAULT_OFFSET_B 192 537 #else 538 #define GEMM_DEFAULT_OFFSET_A 0 539 #define GEMM_DEFAULT_OFFSET_B 256 540 #endif 541 542 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 543 544 #define SYMV_P 8 545 546 #ifdef ARCH_X86 547 #define SGEMM_DEFAULT_UNROLL_M 4 548 #define DGEMM_DEFAULT_UNROLL_M 2 549 #define QGEMM_DEFAULT_UNROLL_M 2 550 #define CGEMM_DEFAULT_UNROLL_M 2 551 #define ZGEMM_DEFAULT_UNROLL_M 1 552 #define XGEMM_DEFAULT_UNROLL_M 1 553 #else 554 #define SGEMM_DEFAULT_UNROLL_M 8 555 #define DGEMM_DEFAULT_UNROLL_M 4 556 #define QGEMM_DEFAULT_UNROLL_M 2 557 #define CGEMM_DEFAULT_UNROLL_M 4 558 #define ZGEMM_DEFAULT_UNROLL_M 2 559 #define XGEMM_DEFAULT_UNROLL_M 1 560 #endif 561 562 #define SGEMM_DEFAULT_UNROLL_N 4 563 #define DGEMM_DEFAULT_UNROLL_N 4 564 #define QGEMM_DEFAULT_UNROLL_N 2 565 #define CGEMM_DEFAULT_UNROLL_N 2 566 #define ZGEMM_DEFAULT_UNROLL_N 2 567 #define XGEMM_DEFAULT_UNROLL_N 1 568 569 #define SGEMM_DEFAULT_P sgemm_p 570 #define SGEMM_DEFAULT_R sgemm_r 571 572 #define DGEMM_DEFAULT_P dgemm_p 573 #define DGEMM_DEFAULT_R dgemm_r 574 575 #define QGEMM_DEFAULT_P qgemm_p 576 #define QGEMM_DEFAULT_R qgemm_r 577 578 #define CGEMM_DEFAULT_P cgemm_p 579 #define CGEMM_DEFAULT_R cgemm_r 580 581 #define ZGEMM_DEFAULT_P zgemm_p 582 #define ZGEMM_DEFAULT_R zgemm_r 583 584 #define XGEMM_DEFAULT_P xgemm_p 585 #define XGEMM_DEFAULT_R xgemm_r 586 587 #define SGEMM_DEFAULT_Q 128 588 #define DGEMM_DEFAULT_Q 128 589 #define QGEMM_DEFAULT_Q 128 590 #define CGEMM_DEFAULT_Q 128 591 #define ZGEMM_DEFAULT_Q 128 592 #define XGEMM_DEFAULT_Q 128 593 #endif 594 595 #ifdef CORE2 596 597 #define SNUMOPT 8 598 #define DNUMOPT 4 599 600 #define GEMM_DEFAULT_OFFSET_A 448 601 #define GEMM_DEFAULT_OFFSET_B 128 602 #define GEMM_DEFAULT_ALIGN 0x03fffUL 603 604 #define SYMV_P 8 605 606 #define SWITCH_RATIO 4 607 608 #ifdef ARCH_X86 609 #define SGEMM_DEFAULT_UNROLL_M 8 610 #define DGEMM_DEFAULT_UNROLL_M 4 611 #define QGEMM_DEFAULT_UNROLL_M 2 612 #define CGEMM_DEFAULT_UNROLL_M 4 613 #define ZGEMM_DEFAULT_UNROLL_M 2 614 #define XGEMM_DEFAULT_UNROLL_M 1 615 616 #define SGEMM_DEFAULT_UNROLL_N 2 617 #define DGEMM_DEFAULT_UNROLL_N 2 618 #define QGEMM_DEFAULT_UNROLL_N 2 619 #define CGEMM_DEFAULT_UNROLL_N 1 620 #define ZGEMM_DEFAULT_UNROLL_N 1 621 #define XGEMM_DEFAULT_UNROLL_N 1 622 623 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b)) 624 625 #else 626 #define SGEMM_DEFAULT_UNROLL_M 8 627 #define DGEMM_DEFAULT_UNROLL_M 4 628 #define QGEMM_DEFAULT_UNROLL_M 2 629 #define CGEMM_DEFAULT_UNROLL_M 4 630 #define ZGEMM_DEFAULT_UNROLL_M 2 631 #define XGEMM_DEFAULT_UNROLL_M 1 632 633 #define SGEMM_DEFAULT_UNROLL_N 4 634 #define DGEMM_DEFAULT_UNROLL_N 4 635 #define QGEMM_DEFAULT_UNROLL_N 2 636 #define CGEMM_DEFAULT_UNROLL_N 2 637 #define ZGEMM_DEFAULT_UNROLL_N 2 638 #define XGEMM_DEFAULT_UNROLL_N 1 639 #endif 640 641 #define SGEMM_DEFAULT_P sgemm_p 642 #define SGEMM_DEFAULT_R sgemm_r 643 644 #define DGEMM_DEFAULT_P dgemm_p 645 #define DGEMM_DEFAULT_R dgemm_r 646 647 #define QGEMM_DEFAULT_P qgemm_p 648 #define QGEMM_DEFAULT_R qgemm_r 649 650 #define CGEMM_DEFAULT_P cgemm_p 651 #define CGEMM_DEFAULT_R cgemm_r 652 653 #define ZGEMM_DEFAULT_P zgemm_p 654 #define ZGEMM_DEFAULT_R zgemm_r 655 656 #define XGEMM_DEFAULT_P xgemm_p 657 #define XGEMM_DEFAULT_R xgemm_r 658 659 #define SGEMM_DEFAULT_Q 256 660 #define DGEMM_DEFAULT_Q 256 661 #define QGEMM_DEFAULT_Q 256 662 #define CGEMM_DEFAULT_Q 256 663 #define ZGEMM_DEFAULT_Q 256 664 #define XGEMM_DEFAULT_Q 256 665 666 #endif 667 668 #ifdef PENRYN 669 670 #define SNUMOPT 8 671 #define DNUMOPT 4 672 673 #define GEMM_DEFAULT_OFFSET_A 128 674 #define GEMM_DEFAULT_OFFSET_B 0 675 #define GEMM_DEFAULT_ALIGN 0x03fffUL 676 677 #define SYMV_P 8 678 679 #define SWITCH_RATIO 4 680 681 #ifdef ARCH_X86 682 #define SGEMM_DEFAULT_UNROLL_M 4 683 #define DGEMM_DEFAULT_UNROLL_M 2 684 #define QGEMM_DEFAULT_UNROLL_M 2 685 #define CGEMM_DEFAULT_UNROLL_M 2 686 #define ZGEMM_DEFAULT_UNROLL_M 1 687 #define XGEMM_DEFAULT_UNROLL_M 1 688 689 #define SGEMM_DEFAULT_UNROLL_N 4 690 #define DGEMM_DEFAULT_UNROLL_N 4 691 #define QGEMM_DEFAULT_UNROLL_N 2 692 #define CGEMM_DEFAULT_UNROLL_N 2 693 #define ZGEMM_DEFAULT_UNROLL_N 2 694 #define XGEMM_DEFAULT_UNROLL_N 1 695 #else 696 #define SGEMM_DEFAULT_UNROLL_M 8 697 #define DGEMM_DEFAULT_UNROLL_M 4 698 #define QGEMM_DEFAULT_UNROLL_M 2 699 #define CGEMM_DEFAULT_UNROLL_M 4 700 #define ZGEMM_DEFAULT_UNROLL_M 2 701 #define XGEMM_DEFAULT_UNROLL_M 1 702 703 #define SGEMM_DEFAULT_UNROLL_N 4 704 #define DGEMM_DEFAULT_UNROLL_N 4 705 #define QGEMM_DEFAULT_UNROLL_N 2 706 #define CGEMM_DEFAULT_UNROLL_N 2 707 #define ZGEMM_DEFAULT_UNROLL_N 2 708 #define XGEMM_DEFAULT_UNROLL_N 1 709 #endif 710 711 #define SGEMM_DEFAULT_P sgemm_p 712 #define SGEMM_DEFAULT_R sgemm_r 713 714 #define DGEMM_DEFAULT_P dgemm_p 715 #define DGEMM_DEFAULT_R dgemm_r 716 717 #define QGEMM_DEFAULT_P qgemm_p 718 #define QGEMM_DEFAULT_R qgemm_r 719 720 #define CGEMM_DEFAULT_P cgemm_p 721 #define CGEMM_DEFAULT_R cgemm_r 722 723 #define ZGEMM_DEFAULT_P zgemm_p 724 #define ZGEMM_DEFAULT_R zgemm_r 725 726 #define XGEMM_DEFAULT_P xgemm_p 727 #define XGEMM_DEFAULT_R xgemm_r 728 729 #define SGEMM_DEFAULT_Q 512 730 #define DGEMM_DEFAULT_Q 256 731 #define QGEMM_DEFAULT_Q 128 732 #define CGEMM_DEFAULT_Q 512 733 #define ZGEMM_DEFAULT_Q 256 734 #define XGEMM_DEFAULT_Q 128 735 736 #define GETRF_FACTOR 0.75 737 #endif 738 739 #ifdef DUNNINGTON 740 741 #define SNUMOPT 8 742 #define DNUMOPT 4 743 744 #define GEMM_DEFAULT_OFFSET_A 128 745 #define GEMM_DEFAULT_OFFSET_B 0 746 #define GEMM_DEFAULT_ALIGN 0x03fffUL 747 748 #define SYMV_P 8 749 750 #define SWITCH_RATIO 4 751 752 #ifdef ARCH_X86 753 #define SGEMM_DEFAULT_UNROLL_M 4 754 #define DGEMM_DEFAULT_UNROLL_M 2 755 #define QGEMM_DEFAULT_UNROLL_M 2 756 #define CGEMM_DEFAULT_UNROLL_M 2 757 #define ZGEMM_DEFAULT_UNROLL_M 1 758 #define XGEMM_DEFAULT_UNROLL_M 1 759 760 #define SGEMM_DEFAULT_UNROLL_N 4 761 #define DGEMM_DEFAULT_UNROLL_N 4 762 #define QGEMM_DEFAULT_UNROLL_N 2 763 #define CGEMM_DEFAULT_UNROLL_N 2 764 #define ZGEMM_DEFAULT_UNROLL_N 2 765 #define XGEMM_DEFAULT_UNROLL_N 1 766 #else 767 #define SGEMM_DEFAULT_UNROLL_M 8 768 #define DGEMM_DEFAULT_UNROLL_M 4 769 #define QGEMM_DEFAULT_UNROLL_M 2 770 #define CGEMM_DEFAULT_UNROLL_M 4 771 #define ZGEMM_DEFAULT_UNROLL_M 2 772 #define XGEMM_DEFAULT_UNROLL_M 1 773 774 #define SGEMM_DEFAULT_UNROLL_N 4 775 #define DGEMM_DEFAULT_UNROLL_N 4 776 #define QGEMM_DEFAULT_UNROLL_N 2 777 #define CGEMM_DEFAULT_UNROLL_N 2 778 #define ZGEMM_DEFAULT_UNROLL_N 2 779 #define XGEMM_DEFAULT_UNROLL_N 1 780 #endif 781 782 #define SGEMM_DEFAULT_P sgemm_p 783 #define SGEMM_DEFAULT_R sgemm_r 784 785 #define DGEMM_DEFAULT_P dgemm_p 786 #define DGEMM_DEFAULT_R dgemm_r 787 788 #define QGEMM_DEFAULT_P qgemm_p 789 #define QGEMM_DEFAULT_R qgemm_r 790 791 #define CGEMM_DEFAULT_P cgemm_p 792 #define CGEMM_DEFAULT_R cgemm_r 793 794 #define ZGEMM_DEFAULT_P zgemm_p 795 #define ZGEMM_DEFAULT_R zgemm_r 796 797 #define XGEMM_DEFAULT_P xgemm_p 798 #define XGEMM_DEFAULT_R xgemm_r 799 800 #define SGEMM_DEFAULT_Q 768 801 #define DGEMM_DEFAULT_Q 384 802 #define QGEMM_DEFAULT_Q 192 803 #define CGEMM_DEFAULT_Q 768 804 #define ZGEMM_DEFAULT_Q 384 805 #define XGEMM_DEFAULT_Q 192 806 807 #define GETRF_FACTOR 0.75 808 #define GEMM_THREAD gemm_thread_mn 809 #endif 810 811 #ifdef NEHALEM 812 813 #define SNUMOPT 8 814 #define DNUMOPT 4 815 816 #define GEMM_DEFAULT_OFFSET_A 32 817 #define GEMM_DEFAULT_OFFSET_B 0 818 #define GEMM_DEFAULT_ALIGN 0x03fffUL 819 820 #define SYMV_P 8 821 822 #define SWITCH_RATIO 4 823 824 #ifdef ARCH_X86 825 #define SGEMM_DEFAULT_UNROLL_M 4 826 #define DGEMM_DEFAULT_UNROLL_M 2 827 #define QGEMM_DEFAULT_UNROLL_M 2 828 #define CGEMM_DEFAULT_UNROLL_M 2 829 #define ZGEMM_DEFAULT_UNROLL_M 1 830 #define XGEMM_DEFAULT_UNROLL_M 1 831 832 #define SGEMM_DEFAULT_UNROLL_N 4 833 #define DGEMM_DEFAULT_UNROLL_N 4 834 #define QGEMM_DEFAULT_UNROLL_N 2 835 #define CGEMM_DEFAULT_UNROLL_N 2 836 #define ZGEMM_DEFAULT_UNROLL_N 2 837 #define XGEMM_DEFAULT_UNROLL_N 1 838 #else 839 #define SGEMM_DEFAULT_UNROLL_M 4 840 #define DGEMM_DEFAULT_UNROLL_M 2 841 #define QGEMM_DEFAULT_UNROLL_M 2 842 #define CGEMM_DEFAULT_UNROLL_M 2 843 #define ZGEMM_DEFAULT_UNROLL_M 1 844 #define XGEMM_DEFAULT_UNROLL_M 1 845 846 #define SGEMM_DEFAULT_UNROLL_N 8 847 #define DGEMM_DEFAULT_UNROLL_N 8 848 #define QGEMM_DEFAULT_UNROLL_N 2 849 #define CGEMM_DEFAULT_UNROLL_N 4 850 #define ZGEMM_DEFAULT_UNROLL_N 4 851 #define XGEMM_DEFAULT_UNROLL_N 1 852 #endif 853 854 #define SGEMM_DEFAULT_P 504 855 #define SGEMM_DEFAULT_R sgemm_r 856 857 #define DGEMM_DEFAULT_P 504 858 #define DGEMM_DEFAULT_R dgemm_r 859 860 #define QGEMM_DEFAULT_P 504 861 #define QGEMM_DEFAULT_R qgemm_r 862 863 #define CGEMM_DEFAULT_P 252 864 #define CGEMM_DEFAULT_R cgemm_r 865 866 #define ZGEMM_DEFAULT_P 252 867 #define ZGEMM_DEFAULT_R zgemm_r 868 869 #define XGEMM_DEFAULT_P 252 870 #define XGEMM_DEFAULT_R xgemm_r 871 872 #define SGEMM_DEFAULT_Q 512 873 #define DGEMM_DEFAULT_Q 256 874 #define QGEMM_DEFAULT_Q 128 875 #define CGEMM_DEFAULT_Q 512 876 #define ZGEMM_DEFAULT_Q 256 877 #define XGEMM_DEFAULT_Q 128 878 879 #define GETRF_FACTOR 0.72 880 881 #endif 882 883 884 #ifdef ATOM 885 886 #define SNUMOPT 2 887 #define DNUMOPT 1 888 889 #define GEMM_DEFAULT_OFFSET_A 64 890 #define GEMM_DEFAULT_OFFSET_B 0 891 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 892 893 #define SYMV_P 8 894 895 #ifdef ARCH_X86 896 #define SGEMM_DEFAULT_UNROLL_M 4 897 #define DGEMM_DEFAULT_UNROLL_M 2 898 #define QGEMM_DEFAULT_UNROLL_M 2 899 #define CGEMM_DEFAULT_UNROLL_M 2 900 #define ZGEMM_DEFAULT_UNROLL_M 1 901 #define XGEMM_DEFAULT_UNROLL_M 1 902 #else 903 #define SGEMM_DEFAULT_UNROLL_M 8 904 #define DGEMM_DEFAULT_UNROLL_M 4 905 #define QGEMM_DEFAULT_UNROLL_M 2 906 #define CGEMM_DEFAULT_UNROLL_M 4 907 #define ZGEMM_DEFAULT_UNROLL_M 2 908 #define XGEMM_DEFAULT_UNROLL_M 1 909 #endif 910 911 #define SGEMM_DEFAULT_UNROLL_N 4 912 #define DGEMM_DEFAULT_UNROLL_N 2 913 #define QGEMM_DEFAULT_UNROLL_N 2 914 #define CGEMM_DEFAULT_UNROLL_N 2 915 #define ZGEMM_DEFAULT_UNROLL_N 1 916 #define XGEMM_DEFAULT_UNROLL_N 1 917 918 #define SGEMM_DEFAULT_P sgemm_p 919 #define SGEMM_DEFAULT_R sgemm_r 920 921 #define DGEMM_DEFAULT_P dgemm_p 922 #define DGEMM_DEFAULT_R dgemm_r 923 924 #define QGEMM_DEFAULT_P qgemm_p 925 #define QGEMM_DEFAULT_R qgemm_r 926 927 #define CGEMM_DEFAULT_P cgemm_p 928 #define CGEMM_DEFAULT_R cgemm_r 929 930 #define ZGEMM_DEFAULT_P zgemm_p 931 #define ZGEMM_DEFAULT_R zgemm_r 932 933 #define XGEMM_DEFAULT_P xgemm_p 934 #define XGEMM_DEFAULT_R xgemm_r 935 936 #define SGEMM_DEFAULT_Q 256 937 #define DGEMM_DEFAULT_Q 256 938 #define QGEMM_DEFAULT_Q 256 939 #define CGEMM_DEFAULT_Q 256 940 #define ZGEMM_DEFAULT_Q 256 941 #define XGEMM_DEFAULT_Q 256 942 943 #endif 944 945 946 #ifdef ITANIUM2 947 948 #define SNUMOPT 4 949 #define DNUMOPT 4 950 951 #define GEMM_DEFAULT_OFFSET_A 0 952 #define GEMM_DEFAULT_OFFSET_B 128 953 #define GEMM_DEFAULT_ALIGN 0x03fffUL 954 955 #define SGEMM_DEFAULT_UNROLL_M 8 956 #define SGEMM_DEFAULT_UNROLL_N 8 957 #define DGEMM_DEFAULT_UNROLL_M 8 958 #define DGEMM_DEFAULT_UNROLL_N 8 959 #define QGEMM_DEFAULT_UNROLL_M 8 960 #define QGEMM_DEFAULT_UNROLL_N 8 961 #define CGEMM_DEFAULT_UNROLL_M 4 962 #define CGEMM_DEFAULT_UNROLL_N 4 963 #define ZGEMM_DEFAULT_UNROLL_M 4 964 #define ZGEMM_DEFAULT_UNROLL_N 4 965 #define XGEMM_DEFAULT_UNROLL_M 4 966 #define XGEMM_DEFAULT_UNROLL_N 4 967 968 #define SGEMM_DEFAULT_P sgemm_p 969 #define DGEMM_DEFAULT_P dgemm_p 970 #define QGEMM_DEFAULT_P qgemm_p 971 #define CGEMM_DEFAULT_P cgemm_p 972 #define ZGEMM_DEFAULT_P zgemm_p 973 #define XGEMM_DEFAULT_P xgemm_p 974 975 #define SGEMM_DEFAULT_Q 1024 976 #define DGEMM_DEFAULT_Q 1024 977 #define QGEMM_DEFAULT_Q 1024 978 #define CGEMM_DEFAULT_Q 1024 979 #define ZGEMM_DEFAULT_Q 1024 980 #define XGEMM_DEFAULT_Q 1024 981 982 #define SGEMM_DEFAULT_R sgemm_r 983 #define DGEMM_DEFAULT_R dgemm_r 984 #define QGEMM_DEFAULT_R qgemm_r 985 #define CGEMM_DEFAULT_R cgemm_r 986 #define ZGEMM_DEFAULT_R zgemm_r 987 #define XGEMM_DEFAULT_R xgemm_r 988 989 #define SYMV_P 16 990 991 #define GETRF_FACTOR 0.65 992 993 #endif 994 995 #if defined(EV4) || defined(EV5) || defined(EV6) 996 997 #ifdef EV4 998 #define SNUMOPT 1 999 #define DNUMOPT 1 1000 #else 1001 #define SNUMOPT 2 1002 #define DNUMOPT 2 1003 #endif 1004 1005 #define GEMM_DEFAULT_OFFSET_A 512 1006 #define GEMM_DEFAULT_OFFSET_B 512 1007 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1008 1009 #define SGEMM_DEFAULT_UNROLL_M 4 1010 #define SGEMM_DEFAULT_UNROLL_N 4 1011 #define DGEMM_DEFAULT_UNROLL_M 4 1012 #define DGEMM_DEFAULT_UNROLL_N 4 1013 #define CGEMM_DEFAULT_UNROLL_M 2 1014 #define CGEMM_DEFAULT_UNROLL_N 2 1015 #define ZGEMM_DEFAULT_UNROLL_M 2 1016 #define ZGEMM_DEFAULT_UNROLL_N 2 1017 1018 #define SYMV_P 8 1019 1020 #ifdef EV4 1021 #define SGEMM_DEFAULT_P 32 1022 #define SGEMM_DEFAULT_Q 112 1023 #define SGEMM_DEFAULT_R 256 1024 1025 #define DGEMM_DEFAULT_P 32 1026 #define DGEMM_DEFAULT_Q 56 1027 #define DGEMM_DEFAULT_R 256 1028 1029 #define CGEMM_DEFAULT_P 32 1030 #define CGEMM_DEFAULT_Q 64 1031 #define CGEMM_DEFAULT_R 240 1032 1033 #define ZGEMM_DEFAULT_P 32 1034 #define ZGEMM_DEFAULT_Q 32 1035 #define ZGEMM_DEFAULT_R 240 1036 #endif 1037 1038 #ifdef EV5 1039 #define SGEMM_DEFAULT_P 64 1040 #define SGEMM_DEFAULT_Q 256 1041 1042 #define DGEMM_DEFAULT_P 64 1043 #define DGEMM_DEFAULT_Q 128 1044 1045 #define CGEMM_DEFAULT_P 64 1046 #define CGEMM_DEFAULT_Q 128 1047 1048 #define ZGEMM_DEFAULT_P 64 1049 #define ZGEMM_DEFAULT_Q 64 1050 #endif 1051 1052 #ifdef EV6 1053 #define SGEMM_DEFAULT_P 256 1054 #define SGEMM_DEFAULT_Q 512 1055 1056 #define DGEMM_DEFAULT_P 256 1057 #define DGEMM_DEFAULT_Q 256 1058 1059 #define CGEMM_DEFAULT_P 256 1060 #define CGEMM_DEFAULT_Q 256 1061 1062 #define ZGEMM_DEFAULT_P 128 1063 #define ZGEMM_DEFAULT_Q 256 1064 #endif 1065 1066 #endif 1067 1068 #ifdef CELL 1069 1070 #define SNUMOPT 2 1071 #define DNUMOPT 2 1072 1073 #define GEMM_DEFAULT_OFFSET_A 0 1074 #define GEMM_DEFAULT_OFFSET_B 8192 1075 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1076 1077 #define SGEMM_DEFAULT_UNROLL_M 16 1078 #define SGEMM_DEFAULT_UNROLL_N 4 1079 #define DGEMM_DEFAULT_UNROLL_M 4 1080 #define DGEMM_DEFAULT_UNROLL_N 4 1081 #define CGEMM_DEFAULT_UNROLL_M 8 1082 #define CGEMM_DEFAULT_UNROLL_N 2 1083 #define ZGEMM_DEFAULT_UNROLL_M 2 1084 #define ZGEMM_DEFAULT_UNROLL_N 2 1085 1086 #define SGEMM_DEFAULT_P 128 1087 #define DGEMM_DEFAULT_P 128 1088 #define CGEMM_DEFAULT_P 128 1089 #define ZGEMM_DEFAULT_P 128 1090 1091 #define SGEMM_DEFAULT_Q 512 1092 #define DGEMM_DEFAULT_Q 256 1093 #define CGEMM_DEFAULT_Q 256 1094 #define ZGEMM_DEFAULT_Q 128 1095 1096 #define SYMV_P 4 1097 #endif 1098 1099 #ifdef PPCG4 1100 #define GEMM_DEFAULT_OFFSET_A 0 1101 #define GEMM_DEFAULT_OFFSET_B 1024 1102 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1103 1104 #define SGEMM_DEFAULT_UNROLL_M 16 1105 #define SGEMM_DEFAULT_UNROLL_N 4 1106 #define DGEMM_DEFAULT_UNROLL_M 4 1107 #define DGEMM_DEFAULT_UNROLL_N 4 1108 #define CGEMM_DEFAULT_UNROLL_M 8 1109 #define CGEMM_DEFAULT_UNROLL_N 2 1110 #define ZGEMM_DEFAULT_UNROLL_M 2 1111 #define ZGEMM_DEFAULT_UNROLL_N 2 1112 1113 #define SGEMM_DEFAULT_P 256 1114 #define DGEMM_DEFAULT_P 128 1115 #define CGEMM_DEFAULT_P 128 1116 #define ZGEMM_DEFAULT_P 64 1117 1118 #define SGEMM_DEFAULT_Q 256 1119 #define DGEMM_DEFAULT_Q 256 1120 #define CGEMM_DEFAULT_Q 256 1121 #define ZGEMM_DEFAULT_Q 256 1122 1123 #define SYMV_P 4 1124 #endif 1125 1126 #ifdef PPC970 1127 1128 #define SNUMOPT 4 1129 #define DNUMOPT 4 1130 1131 #define GEMM_DEFAULT_OFFSET_A 2688 1132 #define GEMM_DEFAULT_OFFSET_B 3072 1133 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1134 1135 #define SGEMM_DEFAULT_UNROLL_M 16 1136 #define SGEMM_DEFAULT_UNROLL_N 4 1137 #define DGEMM_DEFAULT_UNROLL_M 4 1138 #define DGEMM_DEFAULT_UNROLL_N 4 1139 #define CGEMM_DEFAULT_UNROLL_M 8 1140 #define CGEMM_DEFAULT_UNROLL_N 2 1141 #define ZGEMM_DEFAULT_UNROLL_M 2 1142 #define ZGEMM_DEFAULT_UNROLL_N 2 1143 1144 #ifdef OS_LINUX 1145 #if L2_SIZE == 1024976 1146 #define SGEMM_DEFAULT_P 320 1147 #define DGEMM_DEFAULT_P 256 1148 #define CGEMM_DEFAULT_P 256 1149 #define ZGEMM_DEFAULT_P 256 1150 #else 1151 #define SGEMM_DEFAULT_P 176 1152 #define DGEMM_DEFAULT_P 176 1153 #define CGEMM_DEFAULT_P 176 1154 #define ZGEMM_DEFAULT_P 176 1155 #endif 1156 #endif 1157 1158 #define SGEMM_DEFAULT_Q 512 1159 #define DGEMM_DEFAULT_Q 256 1160 #define CGEMM_DEFAULT_Q 256 1161 #define ZGEMM_DEFAULT_Q 128 1162 1163 #define SYMV_P 4 1164 1165 #endif 1166 1167 #ifdef PPC440 1168 1169 #define SNUMOPT 2 1170 #define DNUMOPT 2 1171 1172 #define GEMM_DEFAULT_OFFSET_A (32 * 0) 1173 #define GEMM_DEFAULT_OFFSET_B (32 * 0) 1174 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1175 1176 #define SGEMM_DEFAULT_UNROLL_M 4 1177 #define SGEMM_DEFAULT_UNROLL_N 4 1178 #define DGEMM_DEFAULT_UNROLL_M 4 1179 #define DGEMM_DEFAULT_UNROLL_N 4 1180 #define CGEMM_DEFAULT_UNROLL_M 2 1181 #define CGEMM_DEFAULT_UNROLL_N 2 1182 #define ZGEMM_DEFAULT_UNROLL_M 2 1183 #define ZGEMM_DEFAULT_UNROLL_N 2 1184 1185 #define SGEMM_DEFAULT_P 512 1186 #define DGEMM_DEFAULT_P 512 1187 #define CGEMM_DEFAULT_P 512 1188 #define ZGEMM_DEFAULT_P 512 1189 1190 #define SGEMM_DEFAULT_Q 1024 1191 #define DGEMM_DEFAULT_Q 512 1192 #define CGEMM_DEFAULT_Q 512 1193 #define ZGEMM_DEFAULT_Q 256 1194 1195 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P 1196 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P 1197 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P 1198 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P 1199 1200 #define SYMV_P 4 1201 #endif 1202 1203 #ifdef PPC440FP2 1204 1205 #define SNUMOPT 4 1206 #define DNUMOPT 4 1207 1208 #define GEMM_DEFAULT_OFFSET_A (32 * 0) 1209 #define GEMM_DEFAULT_OFFSET_B (32 * 0) 1210 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1211 1212 #define SGEMM_DEFAULT_UNROLL_M 8 1213 #define SGEMM_DEFAULT_UNROLL_N 4 1214 #define DGEMM_DEFAULT_UNROLL_M 8 1215 #define DGEMM_DEFAULT_UNROLL_N 4 1216 #define CGEMM_DEFAULT_UNROLL_M 4 1217 #define CGEMM_DEFAULT_UNROLL_N 2 1218 #define ZGEMM_DEFAULT_UNROLL_M 4 1219 #define ZGEMM_DEFAULT_UNROLL_N 2 1220 1221 #define SGEMM_DEFAULT_P 128 1222 #define DGEMM_DEFAULT_P 128 1223 #define CGEMM_DEFAULT_P 128 1224 #define ZGEMM_DEFAULT_P 128 1225 #if 1 1226 #define SGEMM_DEFAULT_Q 4096 1227 #define DGEMM_DEFAULT_Q 3072 1228 #define CGEMM_DEFAULT_Q 2048 1229 #define ZGEMM_DEFAULT_Q 1024 1230 #else 1231 #define SGEMM_DEFAULT_Q 512 1232 #define DGEMM_DEFAULT_Q 256 1233 #define CGEMM_DEFAULT_Q 256 1234 #define ZGEMM_DEFAULT_Q 128 1235 #endif 1236 1237 #define SYMV_P 4 1238 #endif 1239 1240 1241 1242 #if defined(POWER3) || defined(POWER4) || defined(POWER5) 1243 #define GEMM_DEFAULT_OFFSET_A 0 1244 #define GEMM_DEFAULT_OFFSET_B 2048 1245 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1246 1247 #define SGEMM_DEFAULT_UNROLL_M 4 1248 #define SGEMM_DEFAULT_UNROLL_N 4 1249 #define DGEMM_DEFAULT_UNROLL_M 4 1250 #define DGEMM_DEFAULT_UNROLL_N 4 1251 #define CGEMM_DEFAULT_UNROLL_M 2 1252 #define CGEMM_DEFAULT_UNROLL_N 2 1253 #define ZGEMM_DEFAULT_UNROLL_M 2 1254 #define ZGEMM_DEFAULT_UNROLL_N 2 1255 1256 #ifdef POWER3 1257 1258 #define SNUMOPT 4 1259 #define DNUMOPT 4 1260 1261 #define SGEMM_DEFAULT_P 256 1262 #define SGEMM_DEFAULT_Q 432 1263 #define SGEMM_DEFAULT_R 1012 1264 1265 #define DGEMM_DEFAULT_P 256 1266 #define DGEMM_DEFAULT_Q 216 1267 #define DGEMM_DEFAULT_R 1012 1268 1269 #define ZGEMM_DEFAULT_P 256 1270 #define ZGEMM_DEFAULT_Q 104 1271 #define ZGEMM_DEFAULT_R 1012 1272 #endif 1273 1274 #if defined(POWER4) 1275 #ifdef ALLOC_HUGETLB 1276 #define SGEMM_DEFAULT_P 184 1277 #define DGEMM_DEFAULT_P 184 1278 #define CGEMM_DEFAULT_P 184 1279 #define ZGEMM_DEFAULT_P 184 1280 #else 1281 #define SGEMM_DEFAULT_P 144 1282 #define DGEMM_DEFAULT_P 144 1283 #define CGEMM_DEFAULT_P 144 1284 #define ZGEMM_DEFAULT_P 144 1285 #endif 1286 #endif 1287 1288 #if defined(POWER5) 1289 #ifdef ALLOC_HUGETLB 1290 #define SGEMM_DEFAULT_P 512 1291 #define DGEMM_DEFAULT_P 256 1292 #define CGEMM_DEFAULT_P 256 1293 #define ZGEMM_DEFAULT_P 128 1294 #else 1295 #define SGEMM_DEFAULT_P 320 1296 #define DGEMM_DEFAULT_P 160 1297 #define CGEMM_DEFAULT_P 160 1298 #define ZGEMM_DEFAULT_P 80 1299 #endif 1300 1301 #define SGEMM_DEFAULT_Q 256 1302 #define CGEMM_DEFAULT_Q 256 1303 #define DGEMM_DEFAULT_Q 256 1304 #define ZGEMM_DEFAULT_Q 256 1305 #endif 1306 1307 #define SYMV_P 8 1308 1309 #endif 1310 1311 #if defined(POWER6) 1312 1313 #define SNUMOPT 4 1314 #define DNUMOPT 4 1315 1316 #define GEMM_DEFAULT_OFFSET_A 384 1317 #define GEMM_DEFAULT_OFFSET_B 1024 1318 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1319 1320 #define SGEMM_DEFAULT_UNROLL_M 4 1321 #define SGEMM_DEFAULT_UNROLL_N 4 1322 #define DGEMM_DEFAULT_UNROLL_M 4 1323 #define DGEMM_DEFAULT_UNROLL_N 4 1324 #define CGEMM_DEFAULT_UNROLL_M 2 1325 #define CGEMM_DEFAULT_UNROLL_N 4 1326 #define ZGEMM_DEFAULT_UNROLL_M 2 1327 #define ZGEMM_DEFAULT_UNROLL_N 4 1328 1329 #define SGEMM_DEFAULT_P 992 1330 #define DGEMM_DEFAULT_P 480 1331 #define CGEMM_DEFAULT_P 488 1332 #define ZGEMM_DEFAULT_P 248 1333 1334 #define SGEMM_DEFAULT_Q 504 1335 #define DGEMM_DEFAULT_Q 504 1336 #define CGEMM_DEFAULT_Q 400 1337 #define ZGEMM_DEFAULT_Q 400 1338 1339 #define SYMV_P 8 1340 1341 #endif 1342 1343 #if defined(SPARC) && defined(V7) 1344 1345 #define SNUMOPT 4 1346 #define DNUMOPT 4 1347 1348 #define GEMM_DEFAULT_OFFSET_A 0 1349 #define GEMM_DEFAULT_OFFSET_B 2048 1350 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1351 1352 #define SGEMM_DEFAULT_UNROLL_M 2 1353 #define SGEMM_DEFAULT_UNROLL_N 8 1354 #define DGEMM_DEFAULT_UNROLL_M 2 1355 #define DGEMM_DEFAULT_UNROLL_N 8 1356 #define CGEMM_DEFAULT_UNROLL_M 1 1357 #define CGEMM_DEFAULT_UNROLL_N 4 1358 #define ZGEMM_DEFAULT_UNROLL_M 1 1359 #define ZGEMM_DEFAULT_UNROLL_N 4 1360 1361 #define SGEMM_DEFAULT_P 256 1362 #define DGEMM_DEFAULT_P 256 1363 #define CGEMM_DEFAULT_P 256 1364 #define ZGEMM_DEFAULT_P 256 1365 1366 #define SGEMM_DEFAULT_Q 512 1367 #define DGEMM_DEFAULT_Q 256 1368 #define CGEMM_DEFAULT_Q 256 1369 #define ZGEMM_DEFAULT_Q 128 1370 1371 #define SYMV_P 8 1372 #define GEMM_THREAD gemm_thread_mn 1373 #endif 1374 1375 #if defined(SPARC) && defined(V9) 1376 1377 #define SNUMOPT 2 1378 #define DNUMOPT 2 1379 1380 #define GEMM_DEFAULT_OFFSET_A 0 1381 #define GEMM_DEFAULT_OFFSET_B 2048 1382 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1383 1384 #define SGEMM_DEFAULT_UNROLL_M 4 1385 #define SGEMM_DEFAULT_UNROLL_N 4 1386 #define DGEMM_DEFAULT_UNROLL_M 4 1387 #define DGEMM_DEFAULT_UNROLL_N 4 1388 #define CGEMM_DEFAULT_UNROLL_M 2 1389 #define CGEMM_DEFAULT_UNROLL_N 2 1390 #define ZGEMM_DEFAULT_UNROLL_M 2 1391 #define ZGEMM_DEFAULT_UNROLL_N 2 1392 1393 #define SGEMM_DEFAULT_P 512 1394 #define DGEMM_DEFAULT_P 512 1395 #define CGEMM_DEFAULT_P 512 1396 #define ZGEMM_DEFAULT_P 512 1397 1398 #define SGEMM_DEFAULT_Q 1024 1399 #define DGEMM_DEFAULT_Q 512 1400 #define CGEMM_DEFAULT_Q 512 1401 #define ZGEMM_DEFAULT_Q 256 1402 1403 #define SYMV_P 8 1404 #endif 1405 1406 #ifdef SICORTEX 1407 1408 #define SNUMOPT 2 1409 #define DNUMOPT 2 1410 1411 #define GEMM_DEFAULT_OFFSET_A 0 1412 #define GEMM_DEFAULT_OFFSET_B 0 1413 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1414 1415 #define SGEMM_DEFAULT_UNROLL_M 2 1416 #define SGEMM_DEFAULT_UNROLL_N 8 1417 #define DGEMM_DEFAULT_UNROLL_M 2 1418 #define DGEMM_DEFAULT_UNROLL_N 8 1419 #define CGEMM_DEFAULT_UNROLL_M 1 1420 #define CGEMM_DEFAULT_UNROLL_N 4 1421 #define ZGEMM_DEFAULT_UNROLL_M 1 1422 #define ZGEMM_DEFAULT_UNROLL_N 4 1423 1424 #define SGEMM_DEFAULT_P 108 1425 #define DGEMM_DEFAULT_P 112 1426 #define CGEMM_DEFAULT_P 108 1427 #define ZGEMM_DEFAULT_P 112 1428 1429 #define SGEMM_DEFAULT_Q 288 1430 #define DGEMM_DEFAULT_Q 144 1431 #define CGEMM_DEFAULT_Q 144 1432 #define ZGEMM_DEFAULT_Q 72 1433 1434 #define SGEMM_DEFAULT_R 2000 1435 #define DGEMM_DEFAULT_R 2000 1436 #define CGEMM_DEFAULT_R 2000 1437 #define ZGEMM_DEFAULT_R 2000 1438 1439 #define SYMV_P 16 1440 #endif 1441 1442 #ifdef GENERIC 1443 1444 #define SNUMOPT 2 1445 #define DNUMOPT 2 1446 1447 #define GEMM_DEFAULT_OFFSET_A 0 1448 #define GEMM_DEFAULT_OFFSET_B 0 1449 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1450 1451 #define SGEMM_DEFAULT_UNROLL_N 4 1452 #define DGEMM_DEFAULT_UNROLL_N 4 1453 #define QGEMM_DEFAULT_UNROLL_N 2 1454 #define CGEMM_DEFAULT_UNROLL_N 2 1455 #define ZGEMM_DEFAULT_UNROLL_N 2 1456 #define XGEMM_DEFAULT_UNROLL_N 1 1457 1458 #ifdef ARCH_X86 1459 #define SGEMM_DEFAULT_UNROLL_M 4 1460 #define DGEMM_DEFAULT_UNROLL_M 2 1461 #define QGEMM_DEFAULT_UNROLL_M 2 1462 #define CGEMM_DEFAULT_UNROLL_M 2 1463 #define ZGEMM_DEFAULT_UNROLL_M 1 1464 #define XGEMM_DEFAULT_UNROLL_M 1 1465 #else 1466 #define SGEMM_DEFAULT_UNROLL_M 8 1467 #define DGEMM_DEFAULT_UNROLL_M 4 1468 #define QGEMM_DEFAULT_UNROLL_M 2 1469 #define CGEMM_DEFAULT_UNROLL_M 4 1470 #define ZGEMM_DEFAULT_UNROLL_M 2 1471 #define XGEMM_DEFAULT_UNROLL_M 1 1472 #endif 1473 1474 #define SGEMM_P sgemm_p 1475 #define DGEMM_P dgemm_p 1476 #define QGEMM_P qgemm_p 1477 #define CGEMM_P cgemm_p 1478 #define ZGEMM_P zgemm_p 1479 #define XGEMM_P xgemm_p 1480 1481 #define SGEMM_R sgemm_r 1482 #define DGEMM_R dgemm_r 1483 #define QGEMM_R qgemm_r 1484 #define CGEMM_R cgemm_r 1485 #define ZGEMM_R zgemm_r 1486 #define XGEMM_R xgemm_r 1487 1488 #define SGEMM_Q 128 1489 #define DGEMM_Q 128 1490 #define QGEMM_Q 128 1491 #define CGEMM_Q 128 1492 #define ZGEMM_Q 128 1493 #define XGEMM_Q 128 1494 1495 #define SYMV_P 16 1496 1497 #endif 1498 1499 #ifndef QGEMM_DEFAULT_UNROLL_M 1500 #define QGEMM_DEFAULT_UNROLL_M 2 1501 #endif 1502 1503 #ifndef QGEMM_DEFAULT_UNROLL_N 1504 #define QGEMM_DEFAULT_UNROLL_N 2 1505 #endif 1506 1507 #ifndef XGEMM_DEFAULT_UNROLL_M 1508 #define XGEMM_DEFAULT_UNROLL_M 2 1509 #endif 1510 1511 #ifndef XGEMM_DEFAULT_UNROLL_N 1512 #define XGEMM_DEFAULT_UNROLL_N 2 1513 #endif 1514 1515 #ifndef HAVE_SSE2 1516 #define SHUFPD_0 shufps $0x44, 1517 #define SHUFPD_1 shufps $0x4e, 1518 #define SHUFPD_2 shufps $0xe4, 1519 #define SHUFPD_3 shufps $0xee, 1520 #endif 1521 1522 #ifndef SHUFPD_0 1523 #define SHUFPD_0 shufpd $0, 1524 #endif 1525 1526 #ifndef SHUFPD_1 1527 #define SHUFPD_1 shufpd $1, 1528 #endif 1529 1530 #ifndef SHUFPD_2 1531 #define SHUFPD_2 shufpd $2, 1532 #endif 1533 1534 #ifndef SHUFPD_3 1535 #define SHUFPD_3 shufpd $3, 1536 #endif 1537 1538 #ifndef SHUFPS_39 1539 #define SHUFPS_39 shufps $0x39, 1540 #endif 1541 1542 1543 #endif 1544