1 /*********************************************************************/
2 /*                                                                   */
3 /*             Optimized BLAS libraries                              */
4 /*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     */
5 /*                                                                   */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of     */
16 /* profits, interruption of business, or related expenses which may  */
17 /* arise from use of Software or Documentation, including but not    */
18 /* limited to those resulting from defects in Software and/or        */
19 /* Documentation, or loss or inaccuracy of data of any kind.         */
20 /*********************************************************************/
21 
22 #include <stdio.h>
23 #include "common.h"
24 
25 #ifndef LOWER
26 
27 #ifndef CONJ
28 #ifdef XDOUBLE
29 #define KERNEL_FUNC xher2k_kernel_UN
30 #elif defined(DOUBLE)
31 #define KERNEL_FUNC zher2k_kernel_UN
32 #else
33 #define KERNEL_FUNC cher2k_kernel_UN
34 #endif
35 #else
36 #ifdef XDOUBLE
37 #define KERNEL_FUNC xher2k_kernel_UC
38 #elif defined(DOUBLE)
39 #define KERNEL_FUNC zher2k_kernel_UC
40 #else
41 #define KERNEL_FUNC cher2k_kernel_UC
42 #endif
43 #endif
44 
45 #else
46 
47 #ifndef CONJ
48 #ifdef XDOUBLE
49 #define KERNEL_FUNC xher2k_kernel_LN
50 #elif defined(DOUBLE)
51 #define KERNEL_FUNC zher2k_kernel_LN
52 #else
53 #define KERNEL_FUNC cher2k_kernel_LN
54 #endif
55 #else
56 #ifdef XDOUBLE
57 #define KERNEL_FUNC xher2k_kernel_LC
58 #elif defined(DOUBLE)
59 #define KERNEL_FUNC zher2k_kernel_LC
60 #else
61 #define KERNEL_FUNC cher2k_kernel_LC
62 #endif
63 #endif
64 
65 #endif
66 
67 #define KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, C, LDC, X, Y, FLAG) \
68 	KERNEL_FUNC(M, N, K, ALPHA[0], ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC, (X) - (Y), FLAG)
69 
70 #define KERNEL_OPERATION_C(M, N, K, ALPHA, SA, SB, C, LDC, X, Y, FLAG) \
71 	KERNEL_FUNC(M, N, K, ALPHA[0], -ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC, (X) - (Y), FLAG)
72 
73 #if   !defined(LOWER) && !defined(TRANS)
74 #define SYRK_LOCAL    HER2K_UN
75 #elif !defined(LOWER) &&  defined(TRANS)
76 #define SYRK_LOCAL    HER2K_UC
77 #elif  defined(LOWER) && !defined(TRANS)
78 #define SYRK_LOCAL    HER2K_LN
79 #else
80 #define SYRK_LOCAL    HER2K_LC
81 #endif
82 
83 #undef SCAL_K
84 
85 #ifdef XDOUBLE
86 #define SCAL_K		QSCAL_K
87 #elif defined(DOUBLE)
88 #define SCAL_K		DSCAL_K
89 #else
90 #define SCAL_K		SSCAL_K
91 #endif
92 
syrk_beta(BLASLONG m_from,BLASLONG m_to,BLASLONG n_from,BLASLONG n_to,FLOAT * alpha,FLOAT * c,BLASLONG ldc)93 static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
94 
95   BLASLONG i;
96 
97 #ifndef LOWER
98   if (m_from > n_from) n_from = m_from;
99   if (m_to   > n_to  ) m_to   = n_to;
100 #else
101   if (m_from < n_from) m_from = n_from;
102   if (m_to   < n_to  ) n_to   = m_to;
103 #endif
104 
105   c += (m_from + n_from * ldc) * COMPSIZE;
106 
107   m_to -= m_from;
108   n_to -= n_from;
109 
110   for (i = 0; i < n_to; i++){
111 
112 #ifndef LOWER
113 
114     SCAL_K(MIN(i + n_from - m_from + 1, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
115 
116     if (i + n_from - m_from + 1 <= m_to)
117       *(c + (i + n_from - m_from) * COMPSIZE + 1)  = ZERO;
118 
119     c += ldc * COMPSIZE;
120 
121 #else
122 
123     SCAL_K(MIN(m_to - i + m_from - n_from, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
124 
125     if (i < m_from - n_from) {
126       c += ldc * COMPSIZE;
127     } else {
128       *(c + 1)  = ZERO;
129       c += (1 + ldc) * COMPSIZE;
130     }
131 
132 #endif
133 
134   }
135 
136   return 0;
137 }
138 
139 #ifdef THREADED_LEVEL3
140 #include "level3_syr2k_threaded.c"
141 #else
142 #include "level3_syr2k.c"
143 #endif
144