1 /***************************************************************************** 2 Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are 7 met: 8 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in 14 the documentation and/or other materials provided with the 15 distribution. 16 3. Neither the name of the ISCAS nor the names of its contributors may 17 be used to endorse or promote products derived from this software 18 without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 29 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 **********************************************************************************/ 32 33 /*********************************************************************/ 34 /* Copyright 2009, 2010 The University of Texas at Austin. */ 35 /* All rights reserved. */ 36 /* */ 37 /* Redistribution and use in source and binary forms, with or */ 38 /* without modification, are permitted provided that the following */ 39 /* conditions are met: */ 40 /* */ 41 /* 1. Redistributions of source code must retain the above */ 42 /* copyright notice, this list of conditions and the following */ 43 /* disclaimer. */ 44 /* */ 45 /* 2. Redistributions in binary form must reproduce the above */ 46 /* copyright notice, this list of conditions and the following */ 47 /* disclaimer in the documentation and/or other materials */ 48 /* provided with the distribution. */ 49 /* */ 50 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ 51 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 52 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 53 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 54 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ 55 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ 56 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ 57 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ 58 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ 59 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ 60 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 61 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ 62 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 63 /* POSSIBILITY OF SUCH DAMAGE. */ 64 /* */ 65 /* The views and conclusions contained in the software and */ 66 /* documentation are those of the authors and should not be */ 67 /* interpreted as representing official policies, either expressed */ 68 /* or implied, of The University of Texas at Austin. */ 69 /*********************************************************************/ 70 71 #ifndef PARAM_H 72 #define PARAM_H 73 74 #ifdef OPTERON 75 76 #define SNUMOPT 4 77 #define DNUMOPT 2 78 79 #define GEMM_DEFAULT_OFFSET_A 64 80 #define GEMM_DEFAULT_OFFSET_B 256 81 #define GEMM_DEFAULT_ALIGN 0x01ffffUL 82 83 #define SGEMM_DEFAULT_UNROLL_N 4 84 #define DGEMM_DEFAULT_UNROLL_N 4 85 #define QGEMM_DEFAULT_UNROLL_N 2 86 #define CGEMM_DEFAULT_UNROLL_N 2 87 #define ZGEMM_DEFAULT_UNROLL_N 2 88 #define XGEMM_DEFAULT_UNROLL_N 1 89 90 #ifdef ARCH_X86 91 #define SGEMM_DEFAULT_UNROLL_M 4 92 #define DGEMM_DEFAULT_UNROLL_M 2 93 #define QGEMM_DEFAULT_UNROLL_M 2 94 #define CGEMM_DEFAULT_UNROLL_M 2 95 #define ZGEMM_DEFAULT_UNROLL_M 1 96 #define XGEMM_DEFAULT_UNROLL_M 1 97 #else 98 #define SGEMM_DEFAULT_UNROLL_M 8 99 #define DGEMM_DEFAULT_UNROLL_M 4 100 #define QGEMM_DEFAULT_UNROLL_M 2 101 #define CGEMM_DEFAULT_UNROLL_M 4 102 #define ZGEMM_DEFAULT_UNROLL_M 2 103 #define XGEMM_DEFAULT_UNROLL_M 1 104 #endif 105 106 #define SGEMM_DEFAULT_P sgemm_p 107 #define DGEMM_DEFAULT_P dgemm_p 108 #define QGEMM_DEFAULT_P qgemm_p 109 #define CGEMM_DEFAULT_P cgemm_p 110 #define ZGEMM_DEFAULT_P zgemm_p 111 #define XGEMM_DEFAULT_P xgemm_p 112 113 #define SGEMM_DEFAULT_R sgemm_r 114 #define DGEMM_DEFAULT_R dgemm_r 115 #define QGEMM_DEFAULT_R qgemm_r 116 #define CGEMM_DEFAULT_R cgemm_r 117 #define ZGEMM_DEFAULT_R zgemm_r 118 #define XGEMM_DEFAULT_R xgemm_r 119 120 #ifdef ALLOC_HUGETLB 121 122 #define SGEMM_DEFAULT_Q 248 123 #define DGEMM_DEFAULT_Q 248 124 #define QGEMM_DEFAULT_Q 248 125 #define CGEMM_DEFAULT_Q 248 126 #define ZGEMM_DEFAULT_Q 248 127 #define XGEMM_DEFAULT_Q 248 128 129 #else 130 131 #define SGEMM_DEFAULT_Q 240 132 #define DGEMM_DEFAULT_Q 240 133 #define QGEMM_DEFAULT_Q 240 134 #define CGEMM_DEFAULT_Q 240 135 #define ZGEMM_DEFAULT_Q 240 136 #define XGEMM_DEFAULT_Q 240 137 138 #endif 139 140 141 #define SYMV_P 16 142 #define HAVE_EXCLUSIVE_CACHE 143 144 #endif 145 146 #if defined(BARCELONA) || defined(SHANGHAI) 147 148 #define SNUMOPT 8 149 #define DNUMOPT 4 150 151 #define GEMM_DEFAULT_OFFSET_A 64 152 #define GEMM_DEFAULT_OFFSET_B 832 153 #define GEMM_DEFAULT_ALIGN 0x0fffUL 154 155 #define SGEMM_DEFAULT_UNROLL_N 4 156 #define DGEMM_DEFAULT_UNROLL_N 4 157 #define QGEMM_DEFAULT_UNROLL_N 2 158 #define CGEMM_DEFAULT_UNROLL_N 2 159 #define ZGEMM_DEFAULT_UNROLL_N 2 160 #define XGEMM_DEFAULT_UNROLL_N 1 161 162 #ifdef ARCH_X86 163 #define SGEMM_DEFAULT_UNROLL_M 4 164 #define DGEMM_DEFAULT_UNROLL_M 2 165 #define QGEMM_DEFAULT_UNROLL_M 2 166 #define CGEMM_DEFAULT_UNROLL_M 2 167 #define ZGEMM_DEFAULT_UNROLL_M 1 168 #define XGEMM_DEFAULT_UNROLL_M 1 169 #else 170 #define SGEMM_DEFAULT_UNROLL_M 8 171 #define DGEMM_DEFAULT_UNROLL_M 4 172 #define QGEMM_DEFAULT_UNROLL_M 2 173 #define CGEMM_DEFAULT_UNROLL_M 4 174 #define ZGEMM_DEFAULT_UNROLL_M 2 175 #define XGEMM_DEFAULT_UNROLL_M 1 176 #endif 177 178 #if 0 179 #define SGEMM_DEFAULT_P 496 180 #define DGEMM_DEFAULT_P 248 181 #define QGEMM_DEFAULT_P 124 182 #define CGEMM_DEFAULT_P 248 183 #define ZGEMM_DEFAULT_P 124 184 #define XGEMM_DEFAULT_P 62 185 186 #define SGEMM_DEFAULT_Q 248 187 #define DGEMM_DEFAULT_Q 248 188 #define QGEMM_DEFAULT_Q 248 189 #define CGEMM_DEFAULT_Q 248 190 #define ZGEMM_DEFAULT_Q 248 191 #define XGEMM_DEFAULT_Q 248 192 193 #else 194 195 #define SGEMM_DEFAULT_P 448 196 #define DGEMM_DEFAULT_P 224 197 #define QGEMM_DEFAULT_P 112 198 #define CGEMM_DEFAULT_P 224 199 #define ZGEMM_DEFAULT_P 112 200 #define XGEMM_DEFAULT_P 56 201 202 #define SGEMM_DEFAULT_Q 224 203 #define DGEMM_DEFAULT_Q 224 204 #define QGEMM_DEFAULT_Q 224 205 #define CGEMM_DEFAULT_Q 224 206 #define ZGEMM_DEFAULT_Q 224 207 #define XGEMM_DEFAULT_Q 224 208 209 #endif 210 211 #define SGEMM_DEFAULT_R sgemm_r 212 #define QGEMM_DEFAULT_R qgemm_r 213 #define DGEMM_DEFAULT_R dgemm_r 214 #define CGEMM_DEFAULT_R cgemm_r 215 #define ZGEMM_DEFAULT_R zgemm_r 216 #define XGEMM_DEFAULT_R xgemm_r 217 218 #define SYMV_P 16 219 #define HAVE_EXCLUSIVE_CACHE 220 221 #define GEMM_THREAD gemm_thread_mn 222 223 #endif 224 225 #ifdef ATHLON 226 227 #define SNUMOPT 4 228 #define DNUMOPT 2 229 230 #define GEMM_DEFAULT_OFFSET_A 0 231 #define GEMM_DEFAULT_OFFSET_B 384 232 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 233 234 #define SGEMM_DEFAULT_UNROLL_N 4 235 #define DGEMM_DEFAULT_UNROLL_N 4 236 #define QGEMM_DEFAULT_UNROLL_N 2 237 #define CGEMM_DEFAULT_UNROLL_N 2 238 #define ZGEMM_DEFAULT_UNROLL_N 2 239 #define XGEMM_DEFAULT_UNROLL_N 1 240 241 #define SGEMM_DEFAULT_UNROLL_M 2 242 #define DGEMM_DEFAULT_UNROLL_M 1 243 #define QGEMM_DEFAULT_UNROLL_M 2 244 #define CGEMM_DEFAULT_UNROLL_M 1 245 #define ZGEMM_DEFAULT_UNROLL_M 1 246 #define XGEMM_DEFAULT_UNROLL_M 1 247 248 #define SGEMM_DEFAULT_R sgemm_r 249 #define DGEMM_DEFAULT_R dgemm_r 250 #define QGEMM_DEFAULT_R qgemm_r 251 #define CGEMM_DEFAULT_R cgemm_r 252 #define ZGEMM_DEFAULT_R zgemm_r 253 #define XGEMM_DEFAULT_R xgemm_r 254 255 #define SGEMM_DEFAULT_P 208 256 #define DGEMM_DEFAULT_P 104 257 #define QGEMM_DEFAULT_P 56 258 #define CGEMM_DEFAULT_P 104 259 #define ZGEMM_DEFAULT_P 56 260 #define XGEMM_DEFAULT_P 28 261 262 #define SGEMM_DEFAULT_Q 208 263 #define DGEMM_DEFAULT_Q 208 264 #define QGEMM_DEFAULT_Q 208 265 #define CGEMM_DEFAULT_Q 208 266 #define ZGEMM_DEFAULT_Q 208 267 #define XGEMM_DEFAULT_Q 208 268 269 #define SYMV_P 16 270 #define HAVE_EXCLUSIVE_CACHE 271 #endif 272 273 #ifdef VIAC3 274 275 #define SNUMOPT 2 276 #define DNUMOPT 1 277 278 #define GEMM_DEFAULT_OFFSET_A 0 279 #define GEMM_DEFAULT_OFFSET_B 256 280 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 281 282 #define SGEMM_DEFAULT_UNROLL_N 4 283 #define DGEMM_DEFAULT_UNROLL_N 4 284 #define QGEMM_DEFAULT_UNROLL_N 2 285 #define CGEMM_DEFAULT_UNROLL_N 2 286 #define ZGEMM_DEFAULT_UNROLL_N 2 287 #define XGEMM_DEFAULT_UNROLL_N 1 288 289 #define SGEMM_DEFAULT_UNROLL_M 2 290 #define DGEMM_DEFAULT_UNROLL_M 1 291 #define QGEMM_DEFAULT_UNROLL_M 2 292 #define CGEMM_DEFAULT_UNROLL_M 1 293 #define ZGEMM_DEFAULT_UNROLL_M 1 294 #define XGEMM_DEFAULT_UNROLL_M 1 295 296 #define SGEMM_DEFAULT_R sgemm_r 297 #define DGEMM_DEFAULT_R dgemm_r 298 #define QGEMM_DEFAULT_R qgemm_r 299 #define CGEMM_DEFAULT_R cgemm_r 300 #define ZGEMM_DEFAULT_R zgemm_r 301 #define XGEMM_DEFAULT_R xgemm_r 302 303 #define SGEMM_DEFAULT_P 128 304 #define DGEMM_DEFAULT_P 128 305 #define QGEMM_DEFAULT_P 128 306 #define CGEMM_DEFAULT_P 128 307 #define ZGEMM_DEFAULT_P 128 308 #define XGEMM_DEFAULT_P 128 309 310 #define SGEMM_DEFAULT_Q 512 311 #define DGEMM_DEFAULT_Q 256 312 #define QGEMM_DEFAULT_Q 256 313 #define CGEMM_DEFAULT_Q 256 314 #define ZGEMM_DEFAULT_Q 128 315 #define XGEMM_DEFAULT_Q 128 316 317 #define SYMV_P 16 318 #endif 319 320 #ifdef NANO 321 322 #define SNUMOPT 4 323 #define DNUMOPT 2 324 325 #define GEMM_DEFAULT_OFFSET_A 64 326 #define GEMM_DEFAULT_OFFSET_B 256 327 #define GEMM_DEFAULT_ALIGN 0x01ffffUL 328 329 #ifdef ARCH_X86 330 #define SGEMM_DEFAULT_UNROLL_N 4 331 #define DGEMM_DEFAULT_UNROLL_N 4 332 #define QGEMM_DEFAULT_UNROLL_N 2 333 #define CGEMM_DEFAULT_UNROLL_N 2 334 #define ZGEMM_DEFAULT_UNROLL_N 2 335 #define XGEMM_DEFAULT_UNROLL_N 1 336 337 #define SGEMM_DEFAULT_UNROLL_M 4 338 #define DGEMM_DEFAULT_UNROLL_M 2 339 #define QGEMM_DEFAULT_UNROLL_M 2 340 #define CGEMM_DEFAULT_UNROLL_M 2 341 #define ZGEMM_DEFAULT_UNROLL_M 1 342 #define XGEMM_DEFAULT_UNROLL_M 1 343 #else 344 #define SGEMM_DEFAULT_UNROLL_N 8 345 #define DGEMM_DEFAULT_UNROLL_N 4 346 #define QGEMM_DEFAULT_UNROLL_N 2 347 #define CGEMM_DEFAULT_UNROLL_N 4 348 #define ZGEMM_DEFAULT_UNROLL_N 2 349 #define XGEMM_DEFAULT_UNROLL_N 1 350 351 #define SGEMM_DEFAULT_UNROLL_M 4 352 #define DGEMM_DEFAULT_UNROLL_M 4 353 #define QGEMM_DEFAULT_UNROLL_M 2 354 #define CGEMM_DEFAULT_UNROLL_M 2 355 #define ZGEMM_DEFAULT_UNROLL_M 2 356 #define XGEMM_DEFAULT_UNROLL_M 1 357 #endif 358 359 #define SGEMM_DEFAULT_P 288 360 #define DGEMM_DEFAULT_P 288 361 #define QGEMM_DEFAULT_P 288 362 #define CGEMM_DEFAULT_P 288 363 #define ZGEMM_DEFAULT_P 288 364 #define XGEMM_DEFAULT_P 288 365 366 #define SGEMM_DEFAULT_R sgemm_r 367 #define DGEMM_DEFAULT_R dgemm_r 368 #define QGEMM_DEFAULT_R qgemm_r 369 #define CGEMM_DEFAULT_R cgemm_r 370 #define ZGEMM_DEFAULT_R zgemm_r 371 #define XGEMM_DEFAULT_R xgemm_r 372 373 #define SGEMM_DEFAULT_Q 256 374 #define DGEMM_DEFAULT_Q 128 375 #define QGEMM_DEFAULT_Q 64 376 #define CGEMM_DEFAULT_Q 128 377 #define ZGEMM_DEFAULT_Q 64 378 #define XGEMM_DEFAULT_Q 32 379 380 #define SYMV_P 16 381 #define HAVE_EXCLUSIVE_CACHE 382 383 #endif 384 385 #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3) 386 387 #ifdef HAVE_SSE 388 #define SNUMOPT 2 389 #else 390 #define SNUMOPT 1 391 #endif 392 #define DNUMOPT 1 393 394 #define GEMM_DEFAULT_OFFSET_A 0 395 #define GEMM_DEFAULT_OFFSET_B 0 396 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 397 398 #ifdef HAVE_SSE 399 #define SGEMM_DEFAULT_UNROLL_M 8 400 #define CGEMM_DEFAULT_UNROLL_M 4 401 #else 402 #define SGEMM_DEFAULT_UNROLL_M 4 403 #define CGEMM_DEFAULT_UNROLL_M 2 404 #endif 405 #define DGEMM_DEFAULT_UNROLL_M 2 406 #define SGEMM_DEFAULT_UNROLL_N 2 407 #define DGEMM_DEFAULT_UNROLL_N 2 408 #define QGEMM_DEFAULT_UNROLL_M 2 409 #define QGEMM_DEFAULT_UNROLL_N 2 410 #define CGEMM_DEFAULT_UNROLL_N 1 411 #define ZGEMM_DEFAULT_UNROLL_M 1 412 #define ZGEMM_DEFAULT_UNROLL_N 1 413 #define XGEMM_DEFAULT_UNROLL_M 1 414 #define XGEMM_DEFAULT_UNROLL_N 1 415 416 #define SGEMM_DEFAULT_P sgemm_p 417 #define SGEMM_DEFAULT_Q 256 418 #define SGEMM_DEFAULT_R sgemm_r 419 420 #define DGEMM_DEFAULT_P dgemm_p 421 #define DGEMM_DEFAULT_Q 256 422 #define DGEMM_DEFAULT_R dgemm_r 423 424 #define QGEMM_DEFAULT_P qgemm_p 425 #define QGEMM_DEFAULT_Q 256 426 #define QGEMM_DEFAULT_R qgemm_r 427 428 #define CGEMM_DEFAULT_P cgemm_p 429 #define CGEMM_DEFAULT_Q 256 430 #define CGEMM_DEFAULT_R cgemm_r 431 432 #define ZGEMM_DEFAULT_P zgemm_p 433 #define ZGEMM_DEFAULT_Q 256 434 #define ZGEMM_DEFAULT_R zgemm_r 435 436 #define XGEMM_DEFAULT_P xgemm_p 437 #define XGEMM_DEFAULT_Q 256 438 #define XGEMM_DEFAULT_R xgemm_r 439 440 #define SYMV_P 4 441 442 #endif 443 444 #ifdef PENTIUMM 445 446 #define SNUMOPT 2 447 #define DNUMOPT 1 448 449 #define GEMM_DEFAULT_OFFSET_A 0 450 #define GEMM_DEFAULT_OFFSET_B 0 451 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 452 453 #ifdef CORE_YONAH 454 #define SGEMM_DEFAULT_UNROLL_M 4 455 #define SGEMM_DEFAULT_UNROLL_N 4 456 #define DGEMM_DEFAULT_UNROLL_M 2 457 #define DGEMM_DEFAULT_UNROLL_N 4 458 #define QGEMM_DEFAULT_UNROLL_M 2 459 #define QGEMM_DEFAULT_UNROLL_N 2 460 #define CGEMM_DEFAULT_UNROLL_M 2 461 #define CGEMM_DEFAULT_UNROLL_N 2 462 #define ZGEMM_DEFAULT_UNROLL_M 1 463 #define ZGEMM_DEFAULT_UNROLL_N 2 464 #define XGEMM_DEFAULT_UNROLL_M 1 465 #define XGEMM_DEFAULT_UNROLL_N 1 466 #else 467 #define SGEMM_DEFAULT_UNROLL_M 8 468 #define SGEMM_DEFAULT_UNROLL_N 2 469 #define DGEMM_DEFAULT_UNROLL_M 2 470 #define DGEMM_DEFAULT_UNROLL_N 2 471 #define QGEMM_DEFAULT_UNROLL_M 2 472 #define QGEMM_DEFAULT_UNROLL_N 2 473 #define CGEMM_DEFAULT_UNROLL_M 4 474 #define CGEMM_DEFAULT_UNROLL_N 1 475 #define ZGEMM_DEFAULT_UNROLL_M 1 476 #define ZGEMM_DEFAULT_UNROLL_N 1 477 #define XGEMM_DEFAULT_UNROLL_M 1 478 #define XGEMM_DEFAULT_UNROLL_N 1 479 480 #endif 481 482 #define SGEMM_DEFAULT_P sgemm_p 483 #define SGEMM_DEFAULT_Q 256 484 #define SGEMM_DEFAULT_R sgemm_r 485 486 #define DGEMM_DEFAULT_P dgemm_p 487 #define DGEMM_DEFAULT_Q 256 488 #define DGEMM_DEFAULT_R dgemm_r 489 490 #define QGEMM_DEFAULT_P qgemm_p 491 #define QGEMM_DEFAULT_Q 256 492 #define QGEMM_DEFAULT_R qgemm_r 493 494 #define CGEMM_DEFAULT_P cgemm_p 495 #define CGEMM_DEFAULT_Q 256 496 #define CGEMM_DEFAULT_R cgemm_r 497 498 #define ZGEMM_DEFAULT_P zgemm_p 499 #define ZGEMM_DEFAULT_Q 256 500 #define ZGEMM_DEFAULT_R zgemm_r 501 502 #define XGEMM_DEFAULT_P xgemm_p 503 #define XGEMM_DEFAULT_Q 256 504 #define XGEMM_DEFAULT_R xgemm_r 505 506 #define SYMV_P 4 507 #endif 508 509 #ifdef CORE_NORTHWOOD 510 511 #define SNUMOPT 4 512 #define DNUMOPT 2 513 514 #define GEMM_DEFAULT_OFFSET_A 0 515 #define GEMM_DEFAULT_OFFSET_B 32 516 517 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 518 519 #define SYMV_P 8 520 521 #define SGEMM_DEFAULT_UNROLL_M 8 522 #define DGEMM_DEFAULT_UNROLL_M 4 523 #define QGEMM_DEFAULT_UNROLL_M 2 524 #define CGEMM_DEFAULT_UNROLL_M 4 525 #define ZGEMM_DEFAULT_UNROLL_M 2 526 #define XGEMM_DEFAULT_UNROLL_M 1 527 528 #define SGEMM_DEFAULT_UNROLL_N 2 529 #define DGEMM_DEFAULT_UNROLL_N 2 530 #define QGEMM_DEFAULT_UNROLL_N 2 531 #define CGEMM_DEFAULT_UNROLL_N 1 532 #define ZGEMM_DEFAULT_UNROLL_N 1 533 #define XGEMM_DEFAULT_UNROLL_N 1 534 535 #define SGEMM_DEFAULT_P sgemm_p 536 #define SGEMM_DEFAULT_R sgemm_r 537 538 #define DGEMM_DEFAULT_P dgemm_p 539 #define DGEMM_DEFAULT_R dgemm_r 540 541 #define QGEMM_DEFAULT_P qgemm_p 542 #define QGEMM_DEFAULT_R qgemm_r 543 544 #define CGEMM_DEFAULT_P cgemm_p 545 #define CGEMM_DEFAULT_R cgemm_r 546 547 #define ZGEMM_DEFAULT_P zgemm_p 548 #define ZGEMM_DEFAULT_R zgemm_r 549 550 #define XGEMM_DEFAULT_P xgemm_p 551 #define XGEMM_DEFAULT_R xgemm_r 552 553 #define SGEMM_DEFAULT_Q 128 554 #define DGEMM_DEFAULT_Q 128 555 #define QGEMM_DEFAULT_Q 128 556 #define CGEMM_DEFAULT_Q 128 557 #define ZGEMM_DEFAULT_Q 128 558 #define XGEMM_DEFAULT_Q 128 559 #endif 560 561 #ifdef CORE_PRESCOTT 562 563 #define SNUMOPT 4 564 #define DNUMOPT 2 565 566 #ifndef __64BIT__ 567 #define GEMM_DEFAULT_OFFSET_A 128 568 #define GEMM_DEFAULT_OFFSET_B 192 569 #else 570 #define GEMM_DEFAULT_OFFSET_A 0 571 #define GEMM_DEFAULT_OFFSET_B 256 572 #endif 573 574 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 575 576 #define SYMV_P 8 577 578 #ifdef ARCH_X86 579 #define SGEMM_DEFAULT_UNROLL_M 4 580 #define DGEMM_DEFAULT_UNROLL_M 2 581 #define QGEMM_DEFAULT_UNROLL_M 2 582 #define CGEMM_DEFAULT_UNROLL_M 2 583 #define ZGEMM_DEFAULT_UNROLL_M 1 584 #define XGEMM_DEFAULT_UNROLL_M 1 585 #else 586 #define SGEMM_DEFAULT_UNROLL_M 8 587 #define DGEMM_DEFAULT_UNROLL_M 4 588 #define QGEMM_DEFAULT_UNROLL_M 2 589 #define CGEMM_DEFAULT_UNROLL_M 4 590 #define ZGEMM_DEFAULT_UNROLL_M 2 591 #define XGEMM_DEFAULT_UNROLL_M 1 592 #endif 593 594 #define SGEMM_DEFAULT_UNROLL_N 4 595 #define DGEMM_DEFAULT_UNROLL_N 4 596 #define QGEMM_DEFAULT_UNROLL_N 2 597 #define CGEMM_DEFAULT_UNROLL_N 2 598 #define ZGEMM_DEFAULT_UNROLL_N 2 599 #define XGEMM_DEFAULT_UNROLL_N 1 600 601 #define SGEMM_DEFAULT_P sgemm_p 602 #define SGEMM_DEFAULT_R sgemm_r 603 604 #define DGEMM_DEFAULT_P dgemm_p 605 #define DGEMM_DEFAULT_R dgemm_r 606 607 #define QGEMM_DEFAULT_P qgemm_p 608 #define QGEMM_DEFAULT_R qgemm_r 609 610 #define CGEMM_DEFAULT_P cgemm_p 611 #define CGEMM_DEFAULT_R cgemm_r 612 613 #define ZGEMM_DEFAULT_P zgemm_p 614 #define ZGEMM_DEFAULT_R zgemm_r 615 616 #define XGEMM_DEFAULT_P xgemm_p 617 #define XGEMM_DEFAULT_R xgemm_r 618 619 #define SGEMM_DEFAULT_Q 128 620 #define DGEMM_DEFAULT_Q 128 621 #define QGEMM_DEFAULT_Q 128 622 #define CGEMM_DEFAULT_Q 128 623 #define ZGEMM_DEFAULT_Q 128 624 #define XGEMM_DEFAULT_Q 128 625 #endif 626 627 #ifdef CORE2 628 629 #define SNUMOPT 8 630 #define DNUMOPT 4 631 632 #define GEMM_DEFAULT_OFFSET_A 448 633 #define GEMM_DEFAULT_OFFSET_B 128 634 #define GEMM_DEFAULT_ALIGN 0x03fffUL 635 636 #define SYMV_P 8 637 638 #define SWITCH_RATIO 4 639 640 #ifdef ARCH_X86 641 #define SGEMM_DEFAULT_UNROLL_M 8 642 #define DGEMM_DEFAULT_UNROLL_M 4 643 #define QGEMM_DEFAULT_UNROLL_M 2 644 #define CGEMM_DEFAULT_UNROLL_M 4 645 #define ZGEMM_DEFAULT_UNROLL_M 2 646 #define XGEMM_DEFAULT_UNROLL_M 1 647 648 #define SGEMM_DEFAULT_UNROLL_N 2 649 #define DGEMM_DEFAULT_UNROLL_N 2 650 #define QGEMM_DEFAULT_UNROLL_N 2 651 #define CGEMM_DEFAULT_UNROLL_N 1 652 #define ZGEMM_DEFAULT_UNROLL_N 1 653 #define XGEMM_DEFAULT_UNROLL_N 1 654 655 #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b)) 656 657 #else 658 #define SGEMM_DEFAULT_UNROLL_M 8 659 #define DGEMM_DEFAULT_UNROLL_M 4 660 #define QGEMM_DEFAULT_UNROLL_M 2 661 #define CGEMM_DEFAULT_UNROLL_M 4 662 #define ZGEMM_DEFAULT_UNROLL_M 2 663 #define XGEMM_DEFAULT_UNROLL_M 1 664 665 #define SGEMM_DEFAULT_UNROLL_N 4 666 #define DGEMM_DEFAULT_UNROLL_N 4 667 #define QGEMM_DEFAULT_UNROLL_N 2 668 #define CGEMM_DEFAULT_UNROLL_N 2 669 #define ZGEMM_DEFAULT_UNROLL_N 2 670 #define XGEMM_DEFAULT_UNROLL_N 1 671 #endif 672 673 #define SGEMM_DEFAULT_P sgemm_p 674 #define SGEMM_DEFAULT_R sgemm_r 675 676 #define DGEMM_DEFAULT_P dgemm_p 677 #define DGEMM_DEFAULT_R dgemm_r 678 679 #define QGEMM_DEFAULT_P qgemm_p 680 #define QGEMM_DEFAULT_R qgemm_r 681 682 #define CGEMM_DEFAULT_P cgemm_p 683 #define CGEMM_DEFAULT_R cgemm_r 684 685 #define ZGEMM_DEFAULT_P zgemm_p 686 #define ZGEMM_DEFAULT_R zgemm_r 687 688 #define XGEMM_DEFAULT_P xgemm_p 689 #define XGEMM_DEFAULT_R xgemm_r 690 691 #define SGEMM_DEFAULT_Q 256 692 #define DGEMM_DEFAULT_Q 256 693 #define QGEMM_DEFAULT_Q 256 694 #define CGEMM_DEFAULT_Q 256 695 #define ZGEMM_DEFAULT_Q 256 696 #define XGEMM_DEFAULT_Q 256 697 698 #endif 699 700 #ifdef PENRYN 701 702 #define SNUMOPT 8 703 #define DNUMOPT 4 704 705 #define GEMM_DEFAULT_OFFSET_A 128 706 #define GEMM_DEFAULT_OFFSET_B 0 707 #define GEMM_DEFAULT_ALIGN 0x03fffUL 708 709 #define SYMV_P 8 710 711 #define SWITCH_RATIO 4 712 713 #ifdef ARCH_X86 714 #define SGEMM_DEFAULT_UNROLL_M 4 715 #define DGEMM_DEFAULT_UNROLL_M 2 716 #define QGEMM_DEFAULT_UNROLL_M 2 717 #define CGEMM_DEFAULT_UNROLL_M 2 718 #define ZGEMM_DEFAULT_UNROLL_M 1 719 #define XGEMM_DEFAULT_UNROLL_M 1 720 721 #define SGEMM_DEFAULT_UNROLL_N 4 722 #define DGEMM_DEFAULT_UNROLL_N 4 723 #define QGEMM_DEFAULT_UNROLL_N 2 724 #define CGEMM_DEFAULT_UNROLL_N 2 725 #define ZGEMM_DEFAULT_UNROLL_N 2 726 #define XGEMM_DEFAULT_UNROLL_N 1 727 #else 728 #define SGEMM_DEFAULT_UNROLL_M 8 729 #define DGEMM_DEFAULT_UNROLL_M 4 730 #define QGEMM_DEFAULT_UNROLL_M 2 731 #define CGEMM_DEFAULT_UNROLL_M 4 732 #define ZGEMM_DEFAULT_UNROLL_M 2 733 #define XGEMM_DEFAULT_UNROLL_M 1 734 735 #define SGEMM_DEFAULT_UNROLL_N 4 736 #define DGEMM_DEFAULT_UNROLL_N 4 737 #define QGEMM_DEFAULT_UNROLL_N 2 738 #define CGEMM_DEFAULT_UNROLL_N 2 739 #define ZGEMM_DEFAULT_UNROLL_N 2 740 #define XGEMM_DEFAULT_UNROLL_N 1 741 #endif 742 743 #define SGEMM_DEFAULT_P sgemm_p 744 #define SGEMM_DEFAULT_R sgemm_r 745 746 #define DGEMM_DEFAULT_P dgemm_p 747 #define DGEMM_DEFAULT_R dgemm_r 748 749 #define QGEMM_DEFAULT_P qgemm_p 750 #define QGEMM_DEFAULT_R qgemm_r 751 752 #define CGEMM_DEFAULT_P cgemm_p 753 #define CGEMM_DEFAULT_R cgemm_r 754 755 #define ZGEMM_DEFAULT_P zgemm_p 756 #define ZGEMM_DEFAULT_R zgemm_r 757 758 #define XGEMM_DEFAULT_P xgemm_p 759 #define XGEMM_DEFAULT_R xgemm_r 760 761 #define SGEMM_DEFAULT_Q 512 762 #define DGEMM_DEFAULT_Q 256 763 #define QGEMM_DEFAULT_Q 128 764 #define CGEMM_DEFAULT_Q 512 765 #define ZGEMM_DEFAULT_Q 256 766 #define XGEMM_DEFAULT_Q 128 767 768 #define GETRF_FACTOR 0.75 769 #endif 770 771 #ifdef DUNNINGTON 772 773 #define SNUMOPT 8 774 #define DNUMOPT 4 775 776 #define GEMM_DEFAULT_OFFSET_A 128 777 #define GEMM_DEFAULT_OFFSET_B 0 778 #define GEMM_DEFAULT_ALIGN 0x03fffUL 779 780 #define SYMV_P 8 781 782 #define SWITCH_RATIO 4 783 784 #ifdef ARCH_X86 785 #define SGEMM_DEFAULT_UNROLL_M 4 786 #define DGEMM_DEFAULT_UNROLL_M 2 787 #define QGEMM_DEFAULT_UNROLL_M 2 788 #define CGEMM_DEFAULT_UNROLL_M 2 789 #define ZGEMM_DEFAULT_UNROLL_M 1 790 #define XGEMM_DEFAULT_UNROLL_M 1 791 792 #define SGEMM_DEFAULT_UNROLL_N 4 793 #define DGEMM_DEFAULT_UNROLL_N 4 794 #define QGEMM_DEFAULT_UNROLL_N 2 795 #define CGEMM_DEFAULT_UNROLL_N 2 796 #define ZGEMM_DEFAULT_UNROLL_N 2 797 #define XGEMM_DEFAULT_UNROLL_N 1 798 #else 799 #define SGEMM_DEFAULT_UNROLL_M 8 800 #define DGEMM_DEFAULT_UNROLL_M 4 801 #define QGEMM_DEFAULT_UNROLL_M 2 802 #define CGEMM_DEFAULT_UNROLL_M 4 803 #define ZGEMM_DEFAULT_UNROLL_M 2 804 #define XGEMM_DEFAULT_UNROLL_M 1 805 806 #define SGEMM_DEFAULT_UNROLL_N 4 807 #define DGEMM_DEFAULT_UNROLL_N 4 808 #define QGEMM_DEFAULT_UNROLL_N 2 809 #define CGEMM_DEFAULT_UNROLL_N 2 810 #define ZGEMM_DEFAULT_UNROLL_N 2 811 #define XGEMM_DEFAULT_UNROLL_N 1 812 #endif 813 814 #define SGEMM_DEFAULT_P sgemm_p 815 #define SGEMM_DEFAULT_R sgemm_r 816 817 #define DGEMM_DEFAULT_P dgemm_p 818 #define DGEMM_DEFAULT_R dgemm_r 819 820 #define QGEMM_DEFAULT_P qgemm_p 821 #define QGEMM_DEFAULT_R qgemm_r 822 823 #define CGEMM_DEFAULT_P cgemm_p 824 #define CGEMM_DEFAULT_R cgemm_r 825 826 #define ZGEMM_DEFAULT_P zgemm_p 827 #define ZGEMM_DEFAULT_R zgemm_r 828 829 #define XGEMM_DEFAULT_P xgemm_p 830 #define XGEMM_DEFAULT_R xgemm_r 831 832 #define SGEMM_DEFAULT_Q 768 833 #define DGEMM_DEFAULT_Q 384 834 #define QGEMM_DEFAULT_Q 192 835 #define CGEMM_DEFAULT_Q 768 836 #define ZGEMM_DEFAULT_Q 384 837 #define XGEMM_DEFAULT_Q 192 838 839 #define GETRF_FACTOR 0.75 840 #define GEMM_THREAD gemm_thread_mn 841 #endif 842 843 #ifdef NEHALEM 844 845 #define SNUMOPT 8 846 #define DNUMOPT 4 847 848 #define GEMM_DEFAULT_OFFSET_A 32 849 #define GEMM_DEFAULT_OFFSET_B 0 850 #define GEMM_DEFAULT_ALIGN 0x03fffUL 851 852 #define SYMV_P 8 853 854 #define SWITCH_RATIO 4 855 856 #ifdef ARCH_X86 857 #define SGEMM_DEFAULT_UNROLL_M 4 858 #define DGEMM_DEFAULT_UNROLL_M 2 859 #define QGEMM_DEFAULT_UNROLL_M 2 860 #define CGEMM_DEFAULT_UNROLL_M 2 861 #define ZGEMM_DEFAULT_UNROLL_M 1 862 #define XGEMM_DEFAULT_UNROLL_M 1 863 864 #define SGEMM_DEFAULT_UNROLL_N 4 865 #define DGEMM_DEFAULT_UNROLL_N 4 866 #define QGEMM_DEFAULT_UNROLL_N 2 867 #define CGEMM_DEFAULT_UNROLL_N 2 868 #define ZGEMM_DEFAULT_UNROLL_N 2 869 #define XGEMM_DEFAULT_UNROLL_N 1 870 #else 871 #define SGEMM_DEFAULT_UNROLL_M 4 872 #define DGEMM_DEFAULT_UNROLL_M 2 873 #define QGEMM_DEFAULT_UNROLL_M 2 874 #define CGEMM_DEFAULT_UNROLL_M 2 875 #define ZGEMM_DEFAULT_UNROLL_M 1 876 #define XGEMM_DEFAULT_UNROLL_M 1 877 878 #define SGEMM_DEFAULT_UNROLL_N 8 879 #define DGEMM_DEFAULT_UNROLL_N 8 880 #define QGEMM_DEFAULT_UNROLL_N 2 881 #define CGEMM_DEFAULT_UNROLL_N 4 882 #define ZGEMM_DEFAULT_UNROLL_N 4 883 #define XGEMM_DEFAULT_UNROLL_N 1 884 #endif 885 886 #define SGEMM_DEFAULT_P 504 887 #define SGEMM_DEFAULT_R sgemm_r 888 889 #define DGEMM_DEFAULT_P 504 890 #define DGEMM_DEFAULT_R dgemm_r 891 892 #define QGEMM_DEFAULT_P 504 893 #define QGEMM_DEFAULT_R qgemm_r 894 895 #define CGEMM_DEFAULT_P 252 896 #define CGEMM_DEFAULT_R cgemm_r 897 898 #define ZGEMM_DEFAULT_P 252 899 #define ZGEMM_DEFAULT_R zgemm_r 900 901 #define XGEMM_DEFAULT_P 252 902 #define XGEMM_DEFAULT_R xgemm_r 903 904 #define SGEMM_DEFAULT_Q 512 905 #define DGEMM_DEFAULT_Q 256 906 #define QGEMM_DEFAULT_Q 128 907 #define CGEMM_DEFAULT_Q 512 908 #define ZGEMM_DEFAULT_Q 256 909 #define XGEMM_DEFAULT_Q 128 910 911 #define GETRF_FACTOR 0.72 912 913 #endif 914 915 916 #ifdef ATOM 917 918 #define SNUMOPT 2 919 #define DNUMOPT 1 920 921 #define GEMM_DEFAULT_OFFSET_A 64 922 #define GEMM_DEFAULT_OFFSET_B 0 923 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 924 925 #define SYMV_P 8 926 927 #ifdef ARCH_X86 928 #define SGEMM_DEFAULT_UNROLL_M 4 929 #define DGEMM_DEFAULT_UNROLL_M 2 930 #define QGEMM_DEFAULT_UNROLL_M 2 931 #define CGEMM_DEFAULT_UNROLL_M 2 932 #define ZGEMM_DEFAULT_UNROLL_M 1 933 #define XGEMM_DEFAULT_UNROLL_M 1 934 #else 935 #define SGEMM_DEFAULT_UNROLL_M 8 936 #define DGEMM_DEFAULT_UNROLL_M 4 937 #define QGEMM_DEFAULT_UNROLL_M 2 938 #define CGEMM_DEFAULT_UNROLL_M 4 939 #define ZGEMM_DEFAULT_UNROLL_M 2 940 #define XGEMM_DEFAULT_UNROLL_M 1 941 #endif 942 943 #define SGEMM_DEFAULT_UNROLL_N 4 944 #define DGEMM_DEFAULT_UNROLL_N 2 945 #define QGEMM_DEFAULT_UNROLL_N 2 946 #define CGEMM_DEFAULT_UNROLL_N 2 947 #define ZGEMM_DEFAULT_UNROLL_N 1 948 #define XGEMM_DEFAULT_UNROLL_N 1 949 950 #define SGEMM_DEFAULT_P sgemm_p 951 #define SGEMM_DEFAULT_R sgemm_r 952 953 #define DGEMM_DEFAULT_P dgemm_p 954 #define DGEMM_DEFAULT_R dgemm_r 955 956 #define QGEMM_DEFAULT_P qgemm_p 957 #define QGEMM_DEFAULT_R qgemm_r 958 959 #define CGEMM_DEFAULT_P cgemm_p 960 #define CGEMM_DEFAULT_R cgemm_r 961 962 #define ZGEMM_DEFAULT_P zgemm_p 963 #define ZGEMM_DEFAULT_R zgemm_r 964 965 #define XGEMM_DEFAULT_P xgemm_p 966 #define XGEMM_DEFAULT_R xgemm_r 967 968 #define SGEMM_DEFAULT_Q 256 969 #define DGEMM_DEFAULT_Q 256 970 #define QGEMM_DEFAULT_Q 256 971 #define CGEMM_DEFAULT_Q 256 972 #define ZGEMM_DEFAULT_Q 256 973 #define XGEMM_DEFAULT_Q 256 974 975 #endif 976 977 978 #ifdef ITANIUM2 979 980 #define SNUMOPT 4 981 #define DNUMOPT 4 982 983 #define GEMM_DEFAULT_OFFSET_A 0 984 #define GEMM_DEFAULT_OFFSET_B 128 985 #define GEMM_DEFAULT_ALIGN 0x03fffUL 986 987 #define SGEMM_DEFAULT_UNROLL_M 8 988 #define SGEMM_DEFAULT_UNROLL_N 8 989 #define DGEMM_DEFAULT_UNROLL_M 8 990 #define DGEMM_DEFAULT_UNROLL_N 8 991 #define QGEMM_DEFAULT_UNROLL_M 8 992 #define QGEMM_DEFAULT_UNROLL_N 8 993 #define CGEMM_DEFAULT_UNROLL_M 4 994 #define CGEMM_DEFAULT_UNROLL_N 4 995 #define ZGEMM_DEFAULT_UNROLL_M 4 996 #define ZGEMM_DEFAULT_UNROLL_N 4 997 #define XGEMM_DEFAULT_UNROLL_M 4 998 #define XGEMM_DEFAULT_UNROLL_N 4 999 1000 #define SGEMM_DEFAULT_P sgemm_p 1001 #define DGEMM_DEFAULT_P dgemm_p 1002 #define QGEMM_DEFAULT_P qgemm_p 1003 #define CGEMM_DEFAULT_P cgemm_p 1004 #define ZGEMM_DEFAULT_P zgemm_p 1005 #define XGEMM_DEFAULT_P xgemm_p 1006 1007 #define SGEMM_DEFAULT_Q 1024 1008 #define DGEMM_DEFAULT_Q 1024 1009 #define QGEMM_DEFAULT_Q 1024 1010 #define CGEMM_DEFAULT_Q 1024 1011 #define ZGEMM_DEFAULT_Q 1024 1012 #define XGEMM_DEFAULT_Q 1024 1013 1014 #define SGEMM_DEFAULT_R sgemm_r 1015 #define DGEMM_DEFAULT_R dgemm_r 1016 #define QGEMM_DEFAULT_R qgemm_r 1017 #define CGEMM_DEFAULT_R cgemm_r 1018 #define ZGEMM_DEFAULT_R zgemm_r 1019 #define XGEMM_DEFAULT_R xgemm_r 1020 1021 #define SYMV_P 16 1022 1023 #define GETRF_FACTOR 0.65 1024 1025 #endif 1026 1027 #if defined(EV4) || defined(EV5) || defined(EV6) 1028 1029 #ifdef EV4 1030 #define SNUMOPT 1 1031 #define DNUMOPT 1 1032 #else 1033 #define SNUMOPT 2 1034 #define DNUMOPT 2 1035 #endif 1036 1037 #define GEMM_DEFAULT_OFFSET_A 512 1038 #define GEMM_DEFAULT_OFFSET_B 512 1039 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1040 1041 #define SGEMM_DEFAULT_UNROLL_M 4 1042 #define SGEMM_DEFAULT_UNROLL_N 4 1043 #define DGEMM_DEFAULT_UNROLL_M 4 1044 #define DGEMM_DEFAULT_UNROLL_N 4 1045 #define CGEMM_DEFAULT_UNROLL_M 2 1046 #define CGEMM_DEFAULT_UNROLL_N 2 1047 #define ZGEMM_DEFAULT_UNROLL_M 2 1048 #define ZGEMM_DEFAULT_UNROLL_N 2 1049 1050 #define SYMV_P 8 1051 1052 #ifdef EV4 1053 #define SGEMM_DEFAULT_P 32 1054 #define SGEMM_DEFAULT_Q 112 1055 #define SGEMM_DEFAULT_R 256 1056 1057 #define DGEMM_DEFAULT_P 32 1058 #define DGEMM_DEFAULT_Q 56 1059 #define DGEMM_DEFAULT_R 256 1060 1061 #define CGEMM_DEFAULT_P 32 1062 #define CGEMM_DEFAULT_Q 64 1063 #define CGEMM_DEFAULT_R 240 1064 1065 #define ZGEMM_DEFAULT_P 32 1066 #define ZGEMM_DEFAULT_Q 32 1067 #define ZGEMM_DEFAULT_R 240 1068 #endif 1069 1070 #ifdef EV5 1071 #define SGEMM_DEFAULT_P 64 1072 #define SGEMM_DEFAULT_Q 256 1073 1074 #define DGEMM_DEFAULT_P 64 1075 #define DGEMM_DEFAULT_Q 128 1076 1077 #define CGEMM_DEFAULT_P 64 1078 #define CGEMM_DEFAULT_Q 128 1079 1080 #define ZGEMM_DEFAULT_P 64 1081 #define ZGEMM_DEFAULT_Q 64 1082 #endif 1083 1084 #ifdef EV6 1085 #define SGEMM_DEFAULT_P 256 1086 #define SGEMM_DEFAULT_Q 512 1087 1088 #define DGEMM_DEFAULT_P 256 1089 #define DGEMM_DEFAULT_Q 256 1090 1091 #define CGEMM_DEFAULT_P 256 1092 #define CGEMM_DEFAULT_Q 256 1093 1094 #define ZGEMM_DEFAULT_P 128 1095 #define ZGEMM_DEFAULT_Q 256 1096 #endif 1097 1098 #endif 1099 1100 #ifdef CELL 1101 1102 #define SNUMOPT 2 1103 #define DNUMOPT 2 1104 1105 #define GEMM_DEFAULT_OFFSET_A 0 1106 #define GEMM_DEFAULT_OFFSET_B 8192 1107 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1108 1109 #define SGEMM_DEFAULT_UNROLL_M 16 1110 #define SGEMM_DEFAULT_UNROLL_N 4 1111 #define DGEMM_DEFAULT_UNROLL_M 4 1112 #define DGEMM_DEFAULT_UNROLL_N 4 1113 #define CGEMM_DEFAULT_UNROLL_M 8 1114 #define CGEMM_DEFAULT_UNROLL_N 2 1115 #define ZGEMM_DEFAULT_UNROLL_M 2 1116 #define ZGEMM_DEFAULT_UNROLL_N 2 1117 1118 #define SGEMM_DEFAULT_P 128 1119 #define DGEMM_DEFAULT_P 128 1120 #define CGEMM_DEFAULT_P 128 1121 #define ZGEMM_DEFAULT_P 128 1122 1123 #define SGEMM_DEFAULT_Q 512 1124 #define DGEMM_DEFAULT_Q 256 1125 #define CGEMM_DEFAULT_Q 256 1126 #define ZGEMM_DEFAULT_Q 128 1127 1128 #define SYMV_P 4 1129 #endif 1130 1131 #ifdef PPCG4 1132 #define GEMM_DEFAULT_OFFSET_A 0 1133 #define GEMM_DEFAULT_OFFSET_B 1024 1134 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1135 1136 #define SGEMM_DEFAULT_UNROLL_M 16 1137 #define SGEMM_DEFAULT_UNROLL_N 4 1138 #define DGEMM_DEFAULT_UNROLL_M 4 1139 #define DGEMM_DEFAULT_UNROLL_N 4 1140 #define CGEMM_DEFAULT_UNROLL_M 8 1141 #define CGEMM_DEFAULT_UNROLL_N 2 1142 #define ZGEMM_DEFAULT_UNROLL_M 2 1143 #define ZGEMM_DEFAULT_UNROLL_N 2 1144 1145 #define SGEMM_DEFAULT_P 256 1146 #define DGEMM_DEFAULT_P 128 1147 #define CGEMM_DEFAULT_P 128 1148 #define ZGEMM_DEFAULT_P 64 1149 1150 #define SGEMM_DEFAULT_Q 256 1151 #define DGEMM_DEFAULT_Q 256 1152 #define CGEMM_DEFAULT_Q 256 1153 #define ZGEMM_DEFAULT_Q 256 1154 1155 #define SYMV_P 4 1156 #endif 1157 1158 #ifdef PPC970 1159 1160 #define SNUMOPT 4 1161 #define DNUMOPT 4 1162 1163 #define GEMM_DEFAULT_OFFSET_A 2688 1164 #define GEMM_DEFAULT_OFFSET_B 3072 1165 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1166 1167 #define SGEMM_DEFAULT_UNROLL_M 16 1168 #define SGEMM_DEFAULT_UNROLL_N 4 1169 #define DGEMM_DEFAULT_UNROLL_M 4 1170 #define DGEMM_DEFAULT_UNROLL_N 4 1171 #define CGEMM_DEFAULT_UNROLL_M 8 1172 #define CGEMM_DEFAULT_UNROLL_N 2 1173 #define ZGEMM_DEFAULT_UNROLL_M 2 1174 #define ZGEMM_DEFAULT_UNROLL_N 2 1175 1176 #ifdef OS_LINUX 1177 #if L2_SIZE == 1024976 1178 #define SGEMM_DEFAULT_P 320 1179 #define DGEMM_DEFAULT_P 256 1180 #define CGEMM_DEFAULT_P 256 1181 #define ZGEMM_DEFAULT_P 256 1182 #else 1183 #define SGEMM_DEFAULT_P 176 1184 #define DGEMM_DEFAULT_P 176 1185 #define CGEMM_DEFAULT_P 176 1186 #define ZGEMM_DEFAULT_P 176 1187 #endif 1188 #endif 1189 1190 #define SGEMM_DEFAULT_Q 512 1191 #define DGEMM_DEFAULT_Q 256 1192 #define CGEMM_DEFAULT_Q 256 1193 #define ZGEMM_DEFAULT_Q 128 1194 1195 #define SYMV_P 4 1196 1197 #endif 1198 1199 #ifdef PPC440 1200 1201 #define SNUMOPT 2 1202 #define DNUMOPT 2 1203 1204 #define GEMM_DEFAULT_OFFSET_A (32 * 0) 1205 #define GEMM_DEFAULT_OFFSET_B (32 * 0) 1206 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1207 1208 #define SGEMM_DEFAULT_UNROLL_M 4 1209 #define SGEMM_DEFAULT_UNROLL_N 4 1210 #define DGEMM_DEFAULT_UNROLL_M 4 1211 #define DGEMM_DEFAULT_UNROLL_N 4 1212 #define CGEMM_DEFAULT_UNROLL_M 2 1213 #define CGEMM_DEFAULT_UNROLL_N 2 1214 #define ZGEMM_DEFAULT_UNROLL_M 2 1215 #define ZGEMM_DEFAULT_UNROLL_N 2 1216 1217 #define SGEMM_DEFAULT_P 512 1218 #define DGEMM_DEFAULT_P 512 1219 #define CGEMM_DEFAULT_P 512 1220 #define ZGEMM_DEFAULT_P 512 1221 1222 #define SGEMM_DEFAULT_Q 1024 1223 #define DGEMM_DEFAULT_Q 512 1224 #define CGEMM_DEFAULT_Q 512 1225 #define ZGEMM_DEFAULT_Q 256 1226 1227 #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P 1228 #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P 1229 #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P 1230 #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P 1231 1232 #define SYMV_P 4 1233 #endif 1234 1235 #ifdef PPC440FP2 1236 1237 #define SNUMOPT 4 1238 #define DNUMOPT 4 1239 1240 #define GEMM_DEFAULT_OFFSET_A (32 * 0) 1241 #define GEMM_DEFAULT_OFFSET_B (32 * 0) 1242 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1243 1244 #define SGEMM_DEFAULT_UNROLL_M 8 1245 #define SGEMM_DEFAULT_UNROLL_N 4 1246 #define DGEMM_DEFAULT_UNROLL_M 8 1247 #define DGEMM_DEFAULT_UNROLL_N 4 1248 #define CGEMM_DEFAULT_UNROLL_M 4 1249 #define CGEMM_DEFAULT_UNROLL_N 2 1250 #define ZGEMM_DEFAULT_UNROLL_M 4 1251 #define ZGEMM_DEFAULT_UNROLL_N 2 1252 1253 #define SGEMM_DEFAULT_P 128 1254 #define DGEMM_DEFAULT_P 128 1255 #define CGEMM_DEFAULT_P 128 1256 #define ZGEMM_DEFAULT_P 128 1257 #if 1 1258 #define SGEMM_DEFAULT_Q 4096 1259 #define DGEMM_DEFAULT_Q 3072 1260 #define CGEMM_DEFAULT_Q 2048 1261 #define ZGEMM_DEFAULT_Q 1024 1262 #else 1263 #define SGEMM_DEFAULT_Q 512 1264 #define DGEMM_DEFAULT_Q 256 1265 #define CGEMM_DEFAULT_Q 256 1266 #define ZGEMM_DEFAULT_Q 128 1267 #endif 1268 1269 #define SYMV_P 4 1270 #endif 1271 1272 1273 1274 #if defined(POWER3) || defined(POWER4) || defined(POWER5) 1275 #define GEMM_DEFAULT_OFFSET_A 0 1276 #define GEMM_DEFAULT_OFFSET_B 2048 1277 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1278 1279 #define SGEMM_DEFAULT_UNROLL_M 4 1280 #define SGEMM_DEFAULT_UNROLL_N 4 1281 #define DGEMM_DEFAULT_UNROLL_M 4 1282 #define DGEMM_DEFAULT_UNROLL_N 4 1283 #define CGEMM_DEFAULT_UNROLL_M 2 1284 #define CGEMM_DEFAULT_UNROLL_N 2 1285 #define ZGEMM_DEFAULT_UNROLL_M 2 1286 #define ZGEMM_DEFAULT_UNROLL_N 2 1287 1288 #ifdef POWER3 1289 1290 #define SNUMOPT 4 1291 #define DNUMOPT 4 1292 1293 #define SGEMM_DEFAULT_P 256 1294 #define SGEMM_DEFAULT_Q 432 1295 #define SGEMM_DEFAULT_R 1012 1296 1297 #define DGEMM_DEFAULT_P 256 1298 #define DGEMM_DEFAULT_Q 216 1299 #define DGEMM_DEFAULT_R 1012 1300 1301 #define ZGEMM_DEFAULT_P 256 1302 #define ZGEMM_DEFAULT_Q 104 1303 #define ZGEMM_DEFAULT_R 1012 1304 #endif 1305 1306 #if defined(POWER4) 1307 #ifdef ALLOC_HUGETLB 1308 #define SGEMM_DEFAULT_P 184 1309 #define DGEMM_DEFAULT_P 184 1310 #define CGEMM_DEFAULT_P 184 1311 #define ZGEMM_DEFAULT_P 184 1312 #else 1313 #define SGEMM_DEFAULT_P 144 1314 #define DGEMM_DEFAULT_P 144 1315 #define CGEMM_DEFAULT_P 144 1316 #define ZGEMM_DEFAULT_P 144 1317 #endif 1318 #endif 1319 1320 #if defined(POWER5) 1321 #ifdef ALLOC_HUGETLB 1322 #define SGEMM_DEFAULT_P 512 1323 #define DGEMM_DEFAULT_P 256 1324 #define CGEMM_DEFAULT_P 256 1325 #define ZGEMM_DEFAULT_P 128 1326 #else 1327 #define SGEMM_DEFAULT_P 320 1328 #define DGEMM_DEFAULT_P 160 1329 #define CGEMM_DEFAULT_P 160 1330 #define ZGEMM_DEFAULT_P 80 1331 #endif 1332 1333 #define SGEMM_DEFAULT_Q 256 1334 #define CGEMM_DEFAULT_Q 256 1335 #define DGEMM_DEFAULT_Q 256 1336 #define ZGEMM_DEFAULT_Q 256 1337 #endif 1338 1339 #define SYMV_P 8 1340 1341 #endif 1342 1343 #if defined(POWER6) 1344 1345 #define SNUMOPT 4 1346 #define DNUMOPT 4 1347 1348 #define GEMM_DEFAULT_OFFSET_A 384 1349 #define GEMM_DEFAULT_OFFSET_B 1024 1350 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1351 1352 #define SGEMM_DEFAULT_UNROLL_M 4 1353 #define SGEMM_DEFAULT_UNROLL_N 4 1354 #define DGEMM_DEFAULT_UNROLL_M 4 1355 #define DGEMM_DEFAULT_UNROLL_N 4 1356 #define CGEMM_DEFAULT_UNROLL_M 2 1357 #define CGEMM_DEFAULT_UNROLL_N 4 1358 #define ZGEMM_DEFAULT_UNROLL_M 2 1359 #define ZGEMM_DEFAULT_UNROLL_N 4 1360 1361 #define SGEMM_DEFAULT_P 992 1362 #define DGEMM_DEFAULT_P 480 1363 #define CGEMM_DEFAULT_P 488 1364 #define ZGEMM_DEFAULT_P 248 1365 1366 #define SGEMM_DEFAULT_Q 504 1367 #define DGEMM_DEFAULT_Q 504 1368 #define CGEMM_DEFAULT_Q 400 1369 #define ZGEMM_DEFAULT_Q 400 1370 1371 #define SYMV_P 8 1372 1373 #endif 1374 1375 #if defined(SPARC) && defined(V7) 1376 1377 #define SNUMOPT 4 1378 #define DNUMOPT 4 1379 1380 #define GEMM_DEFAULT_OFFSET_A 0 1381 #define GEMM_DEFAULT_OFFSET_B 2048 1382 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1383 1384 #define SGEMM_DEFAULT_UNROLL_M 2 1385 #define SGEMM_DEFAULT_UNROLL_N 8 1386 #define DGEMM_DEFAULT_UNROLL_M 2 1387 #define DGEMM_DEFAULT_UNROLL_N 8 1388 #define CGEMM_DEFAULT_UNROLL_M 1 1389 #define CGEMM_DEFAULT_UNROLL_N 4 1390 #define ZGEMM_DEFAULT_UNROLL_M 1 1391 #define ZGEMM_DEFAULT_UNROLL_N 4 1392 1393 #define SGEMM_DEFAULT_P 256 1394 #define DGEMM_DEFAULT_P 256 1395 #define CGEMM_DEFAULT_P 256 1396 #define ZGEMM_DEFAULT_P 256 1397 1398 #define SGEMM_DEFAULT_Q 512 1399 #define DGEMM_DEFAULT_Q 256 1400 #define CGEMM_DEFAULT_Q 256 1401 #define ZGEMM_DEFAULT_Q 128 1402 1403 #define SYMV_P 8 1404 #define GEMM_THREAD gemm_thread_mn 1405 #endif 1406 1407 #if defined(SPARC) && defined(V9) 1408 1409 #define SNUMOPT 2 1410 #define DNUMOPT 2 1411 1412 #define GEMM_DEFAULT_OFFSET_A 0 1413 #define GEMM_DEFAULT_OFFSET_B 2048 1414 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1415 1416 #define SGEMM_DEFAULT_UNROLL_M 4 1417 #define SGEMM_DEFAULT_UNROLL_N 4 1418 #define DGEMM_DEFAULT_UNROLL_M 4 1419 #define DGEMM_DEFAULT_UNROLL_N 4 1420 #define CGEMM_DEFAULT_UNROLL_M 2 1421 #define CGEMM_DEFAULT_UNROLL_N 2 1422 #define ZGEMM_DEFAULT_UNROLL_M 2 1423 #define ZGEMM_DEFAULT_UNROLL_N 2 1424 1425 #define SGEMM_DEFAULT_P 512 1426 #define DGEMM_DEFAULT_P 512 1427 #define CGEMM_DEFAULT_P 512 1428 #define ZGEMM_DEFAULT_P 512 1429 1430 #define SGEMM_DEFAULT_Q 1024 1431 #define DGEMM_DEFAULT_Q 512 1432 #define CGEMM_DEFAULT_Q 512 1433 #define ZGEMM_DEFAULT_Q 256 1434 1435 #define SYMV_P 8 1436 #endif 1437 1438 #ifdef SICORTEX 1439 1440 #define SNUMOPT 2 1441 #define DNUMOPT 2 1442 1443 #define GEMM_DEFAULT_OFFSET_A 0 1444 #define GEMM_DEFAULT_OFFSET_B 0 1445 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1446 1447 #define SGEMM_DEFAULT_UNROLL_M 2 1448 #define SGEMM_DEFAULT_UNROLL_N 8 1449 #define DGEMM_DEFAULT_UNROLL_M 2 1450 #define DGEMM_DEFAULT_UNROLL_N 8 1451 #define CGEMM_DEFAULT_UNROLL_M 1 1452 #define CGEMM_DEFAULT_UNROLL_N 4 1453 #define ZGEMM_DEFAULT_UNROLL_M 1 1454 #define ZGEMM_DEFAULT_UNROLL_N 4 1455 1456 #define SGEMM_DEFAULT_P 108 1457 #define DGEMM_DEFAULT_P 112 1458 #define CGEMM_DEFAULT_P 108 1459 #define ZGEMM_DEFAULT_P 112 1460 1461 #define SGEMM_DEFAULT_Q 288 1462 #define DGEMM_DEFAULT_Q 144 1463 #define CGEMM_DEFAULT_Q 144 1464 #define ZGEMM_DEFAULT_Q 72 1465 1466 #define SGEMM_DEFAULT_R 2000 1467 #define DGEMM_DEFAULT_R 2000 1468 #define CGEMM_DEFAULT_R 2000 1469 #define ZGEMM_DEFAULT_R 2000 1470 1471 #define SYMV_P 16 1472 #endif 1473 1474 #ifdef LOONGSON3A 1475 ////Copy from SICORTEX 1476 #define SNUMOPT 2 1477 #define DNUMOPT 2 1478 1479 #define GEMM_DEFAULT_OFFSET_A 0 1480 #define GEMM_DEFAULT_OFFSET_B 0 1481 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1482 1483 #define SGEMM_DEFAULT_UNROLL_M 8 1484 #define SGEMM_DEFAULT_UNROLL_N 4 1485 1486 #define DGEMM_DEFAULT_UNROLL_M 4 1487 #define DGEMM_DEFAULT_UNROLL_N 4 1488 1489 #define CGEMM_DEFAULT_UNROLL_M 4 1490 #define CGEMM_DEFAULT_UNROLL_N 2 1491 1492 #define ZGEMM_DEFAULT_UNROLL_M 2 1493 #define ZGEMM_DEFAULT_UNROLL_N 2 1494 1495 #define SGEMM_DEFAULT_P 64 1496 #define DGEMM_DEFAULT_P 44 1497 #define CGEMM_DEFAULT_P 64 1498 #define ZGEMM_DEFAULT_P 32 1499 1500 #define SGEMM_DEFAULT_Q 192 1501 #define DGEMM_DEFAULT_Q 92 1502 #define CGEMM_DEFAULT_Q 128 1503 #define ZGEMM_DEFAULT_Q 80 1504 1505 #define SGEMM_DEFAULT_R 640 1506 #define DGEMM_DEFAULT_R dgemm_r 1507 #define CGEMM_DEFAULT_R 640 1508 #define ZGEMM_DEFAULT_R 640 1509 1510 #define GEMM_OFFSET_A1 0x10000 1511 #define GEMM_OFFSET_B1 0x100000 1512 1513 #define SYMV_P 16 1514 #endif 1515 1516 #ifdef LOONGSON3B 1517 #define SNUMOPT 2 1518 #define DNUMOPT 2 1519 1520 #define GEMM_DEFAULT_OFFSET_A 0 1521 #define GEMM_DEFAULT_OFFSET_B 0 1522 #define GEMM_DEFAULT_ALIGN 0x03fffUL 1523 1524 #define SGEMM_DEFAULT_UNROLL_M 2 1525 #define SGEMM_DEFAULT_UNROLL_N 2 1526 1527 #define DGEMM_DEFAULT_UNROLL_M 2 1528 #define DGEMM_DEFAULT_UNROLL_N 2 1529 1530 #define CGEMM_DEFAULT_UNROLL_M 2 1531 #define CGEMM_DEFAULT_UNROLL_N 2 1532 1533 #define ZGEMM_DEFAULT_UNROLL_M 2 1534 #define ZGEMM_DEFAULT_UNROLL_N 2 1535 1536 #define SGEMM_DEFAULT_P 64 1537 #define DGEMM_DEFAULT_P 24 1538 #define CGEMM_DEFAULT_P 24 1539 #define ZGEMM_DEFAULT_P 20 1540 1541 #define SGEMM_DEFAULT_Q 192 1542 #define DGEMM_DEFAULT_Q 128 1543 #define CGEMM_DEFAULT_Q 128 1544 #define ZGEMM_DEFAULT_Q 64 1545 1546 #define SGEMM_DEFAULT_R 512 1547 #define DGEMM_DEFAULT_R 512 1548 #define CGEMM_DEFAULT_R 512 1549 #define ZGEMM_DEFAULT_R 512 1550 1551 #define GEMM_OFFSET_A1 0x10000 1552 #define GEMM_OFFSET_B1 0x100000 1553 1554 #define SYMV_P 16 1555 #endif 1556 1557 #ifdef GENERIC 1558 1559 #define SNUMOPT 2 1560 #define DNUMOPT 2 1561 1562 #define GEMM_DEFAULT_OFFSET_A 0 1563 #define GEMM_DEFAULT_OFFSET_B 0 1564 #define GEMM_DEFAULT_ALIGN 0x0ffffUL 1565 1566 #define SGEMM_DEFAULT_UNROLL_N 4 1567 #define DGEMM_DEFAULT_UNROLL_N 4 1568 #define QGEMM_DEFAULT_UNROLL_N 2 1569 #define CGEMM_DEFAULT_UNROLL_N 2 1570 #define ZGEMM_DEFAULT_UNROLL_N 2 1571 #define XGEMM_DEFAULT_UNROLL_N 1 1572 1573 #ifdef ARCH_X86 1574 #define SGEMM_DEFAULT_UNROLL_M 4 1575 #define DGEMM_DEFAULT_UNROLL_M 2 1576 #define QGEMM_DEFAULT_UNROLL_M 2 1577 #define CGEMM_DEFAULT_UNROLL_M 2 1578 #define ZGEMM_DEFAULT_UNROLL_M 1 1579 #define XGEMM_DEFAULT_UNROLL_M 1 1580 #else 1581 #define SGEMM_DEFAULT_UNROLL_M 8 1582 #define DGEMM_DEFAULT_UNROLL_M 4 1583 #define QGEMM_DEFAULT_UNROLL_M 2 1584 #define CGEMM_DEFAULT_UNROLL_M 4 1585 #define ZGEMM_DEFAULT_UNROLL_M 2 1586 #define XGEMM_DEFAULT_UNROLL_M 1 1587 #endif 1588 1589 #define SGEMM_P sgemm_p 1590 #define DGEMM_P dgemm_p 1591 #define QGEMM_P qgemm_p 1592 #define CGEMM_P cgemm_p 1593 #define ZGEMM_P zgemm_p 1594 #define XGEMM_P xgemm_p 1595 1596 #define SGEMM_R sgemm_r 1597 #define DGEMM_R dgemm_r 1598 #define QGEMM_R qgemm_r 1599 #define CGEMM_R cgemm_r 1600 #define ZGEMM_R zgemm_r 1601 #define XGEMM_R xgemm_r 1602 1603 #define SGEMM_Q 128 1604 #define DGEMM_Q 128 1605 #define QGEMM_Q 128 1606 #define CGEMM_Q 128 1607 #define ZGEMM_Q 128 1608 #define XGEMM_Q 128 1609 1610 #define SYMV_P 16 1611 1612 #endif 1613 1614 #ifndef QGEMM_DEFAULT_UNROLL_M 1615 #define QGEMM_DEFAULT_UNROLL_M 2 1616 #endif 1617 1618 #ifndef QGEMM_DEFAULT_UNROLL_N 1619 #define QGEMM_DEFAULT_UNROLL_N 2 1620 #endif 1621 1622 #ifndef XGEMM_DEFAULT_UNROLL_M 1623 #define XGEMM_DEFAULT_UNROLL_M 2 1624 #endif 1625 1626 #ifndef XGEMM_DEFAULT_UNROLL_N 1627 #define XGEMM_DEFAULT_UNROLL_N 2 1628 #endif 1629 1630 #ifndef HAVE_SSE2 1631 #define SHUFPD_0 shufps $0x44, 1632 #define SHUFPD_1 shufps $0x4e, 1633 #define SHUFPD_2 shufps $0xe4, 1634 #define SHUFPD_3 shufps $0xee, 1635 #endif 1636 1637 #ifndef SHUFPD_0 1638 #define SHUFPD_0 shufpd $0, 1639 #endif 1640 1641 #ifndef SHUFPD_1 1642 #define SHUFPD_1 shufpd $1, 1643 #endif 1644 1645 #ifndef SHUFPD_2 1646 #define SHUFPD_2 shufpd $2, 1647 #endif 1648 1649 #ifndef SHUFPD_3 1650 #define SHUFPD_3 shufpd $3, 1651 #endif 1652 1653 #ifndef SHUFPS_39 1654 #define SHUFPS_39 shufps $0x39, 1655 #endif 1656 1657 1658 #endif 1659