1 /*********************************************************************/
2 /*                                                                   */
3 /*             Optimized BLAS libraries                              */
4 /*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     */
5 /*                                                                   */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of     */
16 /* profits, interruption of business, or related expenses which may  */
17 /* arise from use of Software or Documentation, including but not    */
18 /* limited to those resulting from defects in Software and/or        */
19 /* Documentation, or loss or inaccuracy of data of any kind.         */
20 /*********************************************************************/
21 
22 #include <stdio.h>
23 #include "common.h"
24 
25 #undef TIMING
26 
27 #define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \
28 	GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \
29 		  BETA[0], BETA[1], NULL, 0, NULL, 0, \
30 		  (FLOAT *)(C) + (M_FROM) + (N_FROM) * (LDC) * COMPSIZE, LDC)
31 
32 #ifndef RSIDE
33 #ifndef LOWER
34 #define ICOPYB_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_IUCOPYB(M, N, A, LDA, Y, X, BUFFER)
35 #define ICOPYR_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_IUCOPYR(M, N, A, LDA, Y, X, BUFFER)
36 #define ICOPYI_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_IUCOPYI(M, N, A, LDA, Y, X, BUFFER)
37 #else
38 #define ICOPYB_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_ILCOPYB(M, N, A, LDA, Y, X, BUFFER)
39 #define ICOPYR_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_ILCOPYR(M, N, A, LDA, Y, X, BUFFER)
40 #define ICOPYI_OPERATION(M, N, A, LDA, X, Y, BUFFER) HEMM3M_ILCOPYI(M, N, A, LDA, Y, X, BUFFER)
41 #endif
42 #endif
43 
44 #ifdef RSIDE
45 #ifndef LOWER
46 #define OCOPYB_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \
47 	HEMM3M_OUCOPYB(M, N, A,  LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER)
48 #define OCOPYR_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \
49 	HEMM3M_OUCOPYR(M, N, A,  LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER)
50 #define OCOPYI_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \
51 	HEMM3M_OUCOPYI(M, N, A,  LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER)
52 #else
53 #define OCOPYB_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \
54 	HEMM3M_OLCOPYB(M, N, A,  LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER)
55 #define OCOPYR_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \
56 	HEMM3M_OLCOPYR(M, N, A,  LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER)
57 #define OCOPYI_OPERATION(M, N, A, LDA, ALPHA_R, ALPHA_I, X, Y, BUFFER) \
58 	HEMM3M_OLCOPYI(M, N, A,  LDA, Y, X, ALPHA_R, ALPHA_I, BUFFER)
59 #endif
60 #endif
61 
62 #ifndef RSIDE
63 #define K		args -> m
64 #ifndef LOWER
65 #define GEMM3M_LOCAL    HEMM3M_LU
66 #else
67 #define GEMM3M_LOCAL    HEMM3M_LL
68 #endif
69 #else
70 #define K		args -> n
71 #ifndef LOWER
72 #define GEMM3M_LOCAL    HEMM3M_RU
73 #else
74 #define GEMM3M_LOCAL    HEMM3M_RL
75 #endif
76 #endif
77 
78 #ifdef THREADED_LEVEL3
79 #include "level3_gemm3m_thread.c"
80 #else
81 #include "gemm3m_level3.c"
82 #endif
83