1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
4 /* */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
8 /* */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
11 /* disclaimer. */
12 /* */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
17 /* */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
32 /* */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
38
39 #include <stdio.h>
40 #include "common.h"
41
42 #ifndef LOWER
43
44 #ifndef CONJ
45 #ifdef XDOUBLE
46 #define KERNEL_FUNC xher2k_kernel_UN
47 #elif defined(DOUBLE)
48 #define KERNEL_FUNC zher2k_kernel_UN
49 #else
50 #define KERNEL_FUNC cher2k_kernel_UN
51 #endif
52 #else
53 #ifdef XDOUBLE
54 #define KERNEL_FUNC xher2k_kernel_UC
55 #elif defined(DOUBLE)
56 #define KERNEL_FUNC zher2k_kernel_UC
57 #else
58 #define KERNEL_FUNC cher2k_kernel_UC
59 #endif
60 #endif
61
62 #else
63
64 #ifndef CONJ
65 #ifdef XDOUBLE
66 #define KERNEL_FUNC xher2k_kernel_LN
67 #elif defined(DOUBLE)
68 #define KERNEL_FUNC zher2k_kernel_LN
69 #else
70 #define KERNEL_FUNC cher2k_kernel_LN
71 #endif
72 #else
73 #ifdef XDOUBLE
74 #define KERNEL_FUNC xher2k_kernel_LC
75 #elif defined(DOUBLE)
76 #define KERNEL_FUNC zher2k_kernel_LC
77 #else
78 #define KERNEL_FUNC cher2k_kernel_LC
79 #endif
80 #endif
81
82 #endif
83
84 #define KERNEL_OPERATION(M, N, K, ALPHA, SA, SB, C, LDC, X, Y, FLAG) \
85 KERNEL_FUNC(M, N, K, ALPHA[0], ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC, (X) - (Y), FLAG)
86
87 #define KERNEL_OPERATION_C(M, N, K, ALPHA, SA, SB, C, LDC, X, Y, FLAG) \
88 KERNEL_FUNC(M, N, K, ALPHA[0], -ALPHA[1], SA, SB, (FLOAT *)(C) + ((X) + (Y) * LDC) * COMPSIZE, LDC, (X) - (Y), FLAG)
89
90 #if !defined(LOWER) && !defined(TRANS)
91 #define SYRK_LOCAL HER2K_UN
92 #elif !defined(LOWER) && defined(TRANS)
93 #define SYRK_LOCAL HER2K_UC
94 #elif defined(LOWER) && !defined(TRANS)
95 #define SYRK_LOCAL HER2K_LN
96 #else
97 #define SYRK_LOCAL HER2K_LC
98 #endif
99
100 #undef SCAL_K
101
102 #ifdef XDOUBLE
103 #define SCAL_K QSCAL_K
104 #elif defined(DOUBLE)
105 #define SCAL_K DSCAL_K
106 #else
107 #define SCAL_K SSCAL_K
108 #endif
109
syrk_beta(BLASLONG m_from,BLASLONG m_to,BLASLONG n_from,BLASLONG n_to,FLOAT * alpha,FLOAT * c,BLASLONG ldc)110 static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
111
112 BLASLONG i;
113
114 #ifndef LOWER
115 if (m_from > n_from) n_from = m_from;
116 if (m_to > n_to ) m_to = n_to;
117 #else
118 if (m_from < n_from) m_from = n_from;
119 if (m_to < n_to ) n_to = m_to;
120 #endif
121
122 c += (m_from + n_from * ldc) * COMPSIZE;
123
124 m_to -= m_from;
125 n_to -= n_from;
126
127 for (i = 0; i < n_to; i++){
128
129 #ifndef LOWER
130
131 SCAL_K(MIN(i + n_from - m_from + 1, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
132
133 if (i + n_from - m_from + 1 <= m_to)
134 *(c + (i + n_from - m_from) * COMPSIZE + 1) = ZERO;
135
136 c += ldc * COMPSIZE;
137
138 #else
139
140 SCAL_K(MIN(m_to - i + m_from - n_from, m_to) * COMPSIZE, 0, 0, alpha[0], c, 1, NULL, 0, NULL, 0);
141
142 if (i < m_from - n_from) {
143 c += ldc * COMPSIZE;
144 } else {
145 *(c + 1) = ZERO;
146 c += (1 + ldc) * COMPSIZE;
147 }
148
149 #endif
150
151 }
152
153 return 0;
154 }
155
156 #ifdef THREADED_LEVEL3
157 #include "level3_syr2k_threaded.c"
158 #else
159 #include "level3_syr2k.c"
160 #endif
161