1 /*********************************************************************/ 2 /* */ 3 /* Optimized BLAS libraries */ 4 /* By Kazushige Goto <kgoto@tacc.utexas.edu> */ 5 /* */ 6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */ 7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING */ 8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF */ 9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, */ 10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY */ 11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF */ 12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO */ 13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION. */ 14 /* Under no circumstances shall University be liable for incidental, */ 15 /* special, indirect, direct or consequential damages or loss of */ 16 /* profits, interruption of business, or related expenses which may */ 17 /* arise from use of Software or Documentation, including but not */ 18 /* limited to those resulting from defects in Software and/or */ 19 /* Documentation, or loss or inaccuracy of data of any kind. */ 20 /*********************************************************************/ 21 22 #include <stdio.h> 23 #include "common.h" 24 25 #undef TIMING 26 27 #define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \ 28 GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \ 29 BETA[0], BETA[1], NULL, 0, NULL, 0, \ 30 (FLOAT *)(C) + (M_FROM) + (N_FROM) * (LDC) * COMPSIZE, LDC) 31 32 #ifndef RSIDE 33 #ifndef LOWER 34 #define ICOPYB_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_IUCOPYB(M, N, A, LDA, Y, X, BUFFER) 35 #define ICOPYR_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_IUCOPYR(M, N, A, LDA, Y, X, BUFFER) 36 #define ICOPYI_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_IUCOPYI(M, N, A, LDA, Y, X, BUFFER) 37 #else 38 #define ICOPYB_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_ILCOPYB(M, N, A, LDA, Y, X, BUFFER) 39 #define ICOPYR_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_ILCOPYR(M, N, A, LDA, Y, X, BUFFER) 40 #define ICOPYI_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_ILCOPYI(M, N, A, LDA, Y, X, BUFFER) 41 #endif 42 #endif 43 44 #ifdef RSIDE 45 #ifndef LOWER 46 #define OCOPYB_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \ 47 HEMM3M_OUCOPYB(M, N, A, LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER) 48 #define OCOPYR_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \ 49 HEMM3M_OUCOPYR(M, N, A, LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER) 50 #define OCOPYI_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \ 51 HEMM3M_OUCOPYI(M, N, A, LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER) 52 #else 53 #define OCOPYB_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \ 54 HEMM3M_OLCOPYB(M, N, A, LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER) 55 #define OCOPYR_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \ 56 HEMM3M_OLCOPYR(M, N, A, LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER) 57 #define OCOPYI_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \ 58 HEMM3M_OLCOPYI(M, N, A, LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER) 59 #endif 60 #endif 61 62 #ifndef RSIDE 63 #define K args -> m 64 #ifndef LOWER 65 #define GEMM3M_LOCAL HEMM3M_LU 66 #else 67 #define GEMM3M_LOCAL HEMM3M_LL 68 #endif 69 #else 70 #define K args -> n 71 #ifndef LOWER 72 #define GEMM3M_LOCAL HEMM3M_RU 73 #else 74 #define GEMM3M_LOCAL HEMM3M_RL 75 #endif 76 #endif 77 78 #ifdef THREADED_LEVEL3 79 #include "level3_gemm3m_thread.c" 80 #else 81 #include "gemm3m_level3.c" 82 #endif 83