1 /*********************************************************************/
2 /* */
3 /* Optimized BLAS libraries */
4 /* By Kazushige Goto <kgoto@tacc.utexas.edu> */
5 /* */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION. */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of */
16 /* profits, interruption of business, or related expenses which may */
17 /* arise from use of Software or Documentation, including but not */
18 /* limited to those resulting from defects in Software and/or */
19 /* Documentation, or loss or inaccuracy of data of any kind. */
20 /*********************************************************************/
21
22 #include <stdio.h>
23 #include <ctype.h>
24 #include "common.h"
25 #ifdef FUNCTION_PROFILE
26 #include "functable.h"
27 #endif
28
29 #ifdef XDOUBLE
30 #define ERROR_NAME "XHPR2 "
31 #elif defined(DOUBLE)
32 #define ERROR_NAME "ZHPR2 "
33 #else
34 #define ERROR_NAME "CHPR2 "
35 #endif
36
37 static int (*hpr2[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = {
38 #ifdef XDOUBLE
39 xhpr2_U, xhpr2_L, xhpr2_V, xhpr2_M,
40 #elif defined(DOUBLE)
41 zhpr2_U, zhpr2_L, zhpr2_V, zhpr2_M,
42 #else
43 chpr2_U, chpr2_L, chpr2_V, chpr2_M,
44 #endif
45 };
46
47 #ifdef SMP
48 static int (*hpr2_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = {
49 #ifdef XDOUBLE
50 xhpr2_thread_U, xhpr2_thread_L, xhpr2_thread_V, xhpr2_thread_M,
51 #elif defined(DOUBLE)
52 zhpr2_thread_U, zhpr2_thread_L, zhpr2_thread_V, zhpr2_thread_M,
53 #else
54 chpr2_thread_U, chpr2_thread_L, chpr2_thread_V, chpr2_thread_M,
55 #endif
56 };
57 #endif
58
59 #ifndef CBLAS
60
NAME(char * UPLO,blasint * N,FLOAT * ALPHA,FLOAT * x,blasint * INCX,FLOAT * y,blasint * INCY,FLOAT * a)61 void NAME(char *UPLO, blasint *N, FLOAT *ALPHA,
62 FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a){
63
64 char uplo_arg = *UPLO;
65 blasint n = *N;
66 FLOAT alpha_r = ALPHA[0];
67 FLOAT alpha_i = ALPHA[1];
68 blasint incx = *INCX;
69 blasint incy = *INCY;
70
71 blasint info;
72 int uplo;
73 FLOAT *buffer;
74 #ifdef SMP
75 int nthreads;
76 #endif
77
78 PRINT_DEBUG_NAME;
79
80 TOUPPER(uplo_arg);
81 uplo = -1;
82
83 if (uplo_arg == 'U') uplo = 0;
84 if (uplo_arg == 'L') uplo = 1;
85
86 info = 0;
87
88 if (incy == 0) info = 7;
89 if (incx == 0) info = 5;
90 if (n < 0) info = 2;
91 if (uplo < 0) info = 1;
92
93 if (info != 0) {
94 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
95 return;
96 }
97
98 #else
99
100 void CNAME(enum CBLAS_ORDER order,
101 enum CBLAS_UPLO Uplo,
102 blasint n,
103 FLOAT *ALPHA,
104 FLOAT *x, blasint incx,
105 FLOAT *y, blasint incy,
106 FLOAT *a) {
107
108 FLOAT alpha_r = ALPHA[0];
109 FLOAT alpha_i = ALPHA[1];
110 FLOAT *buffer;
111 int uplo;
112 blasint info;
113 #ifdef SMP
114 int nthreads;
115 #endif
116
117 PRINT_DEBUG_CNAME;
118
119 uplo = -1;
120 info = 0;
121
122 if (order == CblasColMajor) {
123 if (Uplo == CblasUpper) uplo = 0;
124 if (Uplo == CblasLower) uplo = 1;
125
126 info = -1;
127
128 if (incy == 0) info = 7;
129 if (incx == 0) info = 5;
130 if (n < 0) info = 2;
131 if (uplo < 0) info = 1;
132 }
133
134 if (order == CblasRowMajor) {
135 if (Uplo == CblasUpper) uplo = 3;
136 if (Uplo == CblasLower) uplo = 2;
137
138 info = -1;
139
140 if (incx == 0) info = 7;
141 if (incy == 0) info = 5;
142 if (n < 0) info = 2;
143 if (uplo < 0) info = 1;
144 }
145
146 if (info >= 0) {
147 BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
148 return;
149 }
150
151 #endif
152
153 if (n == 0) return;
154
155 if ((alpha_r == ZERO) && (alpha_i == ZERO)) return;
156
157
158 IDEBUG_START;
159
160 FUNCTION_PROFILE_START();
161
162 if (incx < 0 ) x -= (n - 1) * incx * 2;
163 if (incy < 0 ) y -= (n - 1) * incy * 2;
164
165 buffer = (FLOAT *)blas_memory_alloc(1);
166
167 #ifdef SMP
168 nthreads = num_cpu_avail(2);
169
170 if (nthreads == 1) {
171 #endif
172
173 (hpr2[uplo])(n, alpha_r, alpha_i, x, incx, y, incy, a, buffer);
174
175 #ifdef SMP
176 } else {
177
178 (hpr2_thread[uplo])(n, ALPHA, x, incx, y, incy, a, buffer, nthreads);
179
180 }
181 #endif
182
183 blas_memory_free(buffer);
184
185 FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n);
186
187 IDEBUG_END;
188
189 return;
190 }
191