1 /*********************************************************************/
2 /* */
3 /* Optimized BLAS libraries */
4 /* By Kazushige Goto <kgoto@tacc.utexas.edu> */
5 /* */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION. */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of */
16 /* profits, interruption of business, or related expenses which may */
17 /* arise from use of Software or Documentation, including but not */
18 /* limited to those resulting from defects in Software and/or */
19 /* Documentation, or loss or inaccuracy of data of any kind. */
20 /*********************************************************************/
21
22 #include <stdio.h>
23 #include "common.h"
24
25 #ifndef LOWER
26
27 #ifndef CONJ
28 #ifdef XDOUBLE
29 #define KERNEL_FUNC xher2k_kernel_UN
30 #elif defined(DOUBLE)
31 #define KERNEL_FUNC zher2k_kernel_UN
32 #else
33 #define KERNEL_FUNC cher2k_kernel_UN
34 #endif
35 #else
36 #ifdef XDOUBLE
37 #define KERNEL_FUNC xher2k_kernel_UC
38 #elif defined(DOUBLE)
39 #define KERNEL_FUNC zher2k_kernel_UC
40 #else
41 #define KERNEL_FUNC cher2k_kernel_UC
42 #endif
43 #endif
44
45 #else
46
47 #ifndef CONJ
48 #ifdef XDOUBLE
49 #define KERNEL_FUNC xher2k_kernel_LN
50 #elif defined(DOUBLE)
51 #define KERNEL_FUNC zher2k_kernel_LN
52 #else
53 #define KERNEL_FUNC cher2k_kernel_LN
54 #endif
55 #else
56 #ifdef XDOUBLE
57 #define KERNEL_FUNC xher2k_kernel_LC
58 #elif defined(DOUBLE)
59 #define KERNEL_FUNC zher2k_kernel_LC
60 #else
61 #define KERNEL_FUNC cher2k_kernel_LC
62 #endif
63 #endif
64
65 #endif
66
67 #define KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, C, LDC, X, Y, FLAG) \
68 KERNEL_FUNC(M, N, K, ALPHA[0], ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC, (X) - (Y), FLAG)
69
70 #define KERNEL_OPERATION_C(M, N, K, ALPHA, SA, SB, C, LDC, X, Y, FLAG) \
71 KERNEL_FUNC(M, N, K, ALPHA[0], -ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC, (X) - (Y), FLAG)
72
73 #if !defined(LOWER) && !defined(TRANS)
74 #define SYRK_LOCAL HER2K_UN
75 #elif !defined(LOWER) && defined(TRANS)
76 #define SYRK_LOCAL HER2K_UC
77 #elif defined(LOWER) && !defined(TRANS)
78 #define SYRK_LOCAL HER2K_LN
79 #else
80 #define SYRK_LOCAL HER2K_LC
81 #endif
82
83 #undef SCAL_K
84
85 #ifdef XDOUBLE
86 #define SCAL_K QSCAL_K
87 #elif defined(DOUBLE)
88 #define SCAL_K DSCAL_K
89 #else
90 #define SCAL_K SSCAL_K
91 #endif
92
syrk_beta(BLASLONG m_from,BLASLONG m_to,BLASLONG n_from,BLASLONG n_to,FLOAT * alpha,FLOAT * c,BLASLONG ldc)93 static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
94
95 BLASLONG i;
96
97 #ifndef LOWER
98 if (m_from > n_from) n_from = m_from;
99 if (m_to > n_to ) m_to = n_to;
100 #else
101 if (m_from < n_from) m_from = n_from;
102 if (m_to < n_to ) n_to = m_to;
103 #endif
104
105 c += (m_from + n_from * ldc) * COMPSIZE;
106
107 m_to -= m_from;
108 n_to -= n_from;
109
110 for (i = 0; i < n_to; i++){
111
112 #ifndef LOWER
113
114 SCAL_K(MIN(i + n_from - m_from + 1, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
115
116 if (i + n_from - m_from + 1 <= m_to)
117 *(c + (i + n_from - m_from) * COMPSIZE + 1) = ZERO;
118
119 c += ldc * COMPSIZE;
120
121 #else
122
123 SCAL_K(MIN(m_to - i + m_from - n_from, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
124
125 if (i < m_from - n_from) {
126 c += ldc * COMPSIZE;
127 } else {
128 *(c + 1) = ZERO;
129 c += (1 + ldc) * COMPSIZE;
130 }
131
132 #endif
133
134 }
135
136 return 0;
137 }
138
139 #ifdef THREADED_LEVEL3
140 #include "level3_syr2k_threaded.c"
141 #else
142 #include "level3_syr2k.c"
143 #endif
144