1 /*********************************************************************/
2 /*                                                                   */
3 /*             Optimized BLAS libraries                              */
4 /*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     */
5 /*                                                                   */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of     */
16 /* profits, interruption of business, or related expenses which may  */
17 /* arise from use of Software or Documentation, including but not    */
18 /* limited to those resulting from defects in Software and/or        */
19 /* Documentation, or loss or inaccuracy of data of any kind.         */
20 /*********************************************************************/
21 
22 #include <stdio.h>
23 #include <ctype.h>
24 #include "common.h"
25 #ifdef FUNCTION_PROFILE
26 #include "functable.h"
27 #endif
28 
29 #ifdef XDOUBLE
30 #define ERROR_NAME "XHPR2 "
31 #elif defined(DOUBLE)
32 #define ERROR_NAME "ZHPR2 "
33 #else
34 #define ERROR_NAME "CHPR2 "
35 #endif
36 
37 static int (*hpr2[])(BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *) = {
38 #ifdef XDOUBLE
39   xhpr2_U, xhpr2_L, xhpr2_V, xhpr2_M,
40 #elif defined(DOUBLE)
41   zhpr2_U, zhpr2_L, zhpr2_V, zhpr2_M,
42 #else
43   chpr2_U, chpr2_L, chpr2_V, chpr2_M,
44 #endif
45 };
46 
47 #ifdef SMP
48 static int (*hpr2_thread[])(BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, FLOAT *, int) = {
49 #ifdef XDOUBLE
50   xhpr2_thread_U, xhpr2_thread_L, xhpr2_thread_V, xhpr2_thread_M,
51 #elif defined(DOUBLE)
52   zhpr2_thread_U, zhpr2_thread_L, zhpr2_thread_V, zhpr2_thread_M,
53 #else
54   chpr2_thread_U, chpr2_thread_L, chpr2_thread_V, chpr2_thread_M,
55 #endif
56 };
57 #endif
58 
59 #ifndef CBLAS
60 
NAME(char * UPLO,blasint * N,FLOAT * ALPHA,FLOAT * x,blasint * INCX,FLOAT * y,blasint * INCY,FLOAT * a)61 void NAME(char *UPLO, blasint *N, FLOAT  *ALPHA,
62 	 FLOAT  *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *a){
63 
64   char uplo_arg = *UPLO;
65   blasint n		= *N;
66   FLOAT alpha_r	= ALPHA[0];
67   FLOAT alpha_i	= ALPHA[1];
68   blasint incx	= *INCX;
69   blasint incy	= *INCY;
70 
71   blasint info;
72   int uplo;
73   FLOAT *buffer;
74 #ifdef SMP
75   int nthreads;
76 #endif
77 
78   PRINT_DEBUG_NAME;
79 
80   TOUPPER(uplo_arg);
81   uplo  = -1;
82 
83   if (uplo_arg  == 'U') uplo  = 0;
84   if (uplo_arg  == 'L') uplo  = 1;
85 
86   info = 0;
87 
88   if (incy == 0)          info =  7;
89   if (incx == 0)          info =  5;
90   if (n < 0)              info =  2;
91   if (uplo  < 0)          info =  1;
92 
93   if (info != 0) {
94     BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
95     return;
96   }
97 
98 #else
99 
100 void CNAME(enum CBLAS_ORDER order,
101 	   enum CBLAS_UPLO Uplo,
102 	   blasint n,
103 	   FLOAT *ALPHA,
104 	   FLOAT  *x, blasint incx,
105 	   FLOAT  *y, blasint incy,
106 	   FLOAT  *a) {
107 
108   FLOAT alpha_r	= ALPHA[0];
109   FLOAT alpha_i	= ALPHA[1];
110   FLOAT *buffer;
111   int uplo;
112   blasint info;
113 #ifdef SMP
114   int nthreads;
115 #endif
116 
117   PRINT_DEBUG_CNAME;
118 
119   uplo = -1;
120   info =  0;
121 
122   if (order == CblasColMajor) {
123     if (Uplo == CblasUpper)         uplo  = 0;
124     if (Uplo == CblasLower)         uplo  = 1;
125 
126     info = -1;
127 
128     if (incy == 0)          info =  7;
129     if (incx == 0)          info =  5;
130     if (n < 0)              info =  2;
131     if (uplo  < 0)          info =  1;
132   }
133 
134   if (order == CblasRowMajor) {
135     if (Uplo == CblasUpper)         uplo  = 3;
136     if (Uplo == CblasLower)         uplo  = 2;
137 
138     info = -1;
139 
140     if (incx == 0)          info =  7;
141     if (incy == 0)          info =  5;
142     if (n < 0)              info =  2;
143     if (uplo  < 0)          info =  1;
144   }
145 
146   if (info >= 0) {
147     BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
148     return;
149   }
150 
151 #endif
152 
153   if (n == 0) return;
154 
155   if ((alpha_r == ZERO) && (alpha_i == ZERO)) return;
156 
157 
158   IDEBUG_START;
159 
160   FUNCTION_PROFILE_START();
161 
162   if (incx < 0 ) x -= (n - 1) * incx * 2;
163   if (incy < 0 ) y -= (n - 1) * incy * 2;
164 
165   buffer = (FLOAT *)blas_memory_alloc(1);
166 
167 #ifdef SMP
168   nthreads = num_cpu_avail(2);
169 
170   if (nthreads == 1) {
171 #endif
172 
173     (hpr2[uplo])(n, alpha_r, alpha_i, x, incx, y, incy, a, buffer);
174 
175 #ifdef SMP
176   } else {
177 
178     (hpr2_thread[uplo])(n, ALPHA, x, incx, y, incy, a, buffer, nthreads);
179 
180   }
181 #endif
182 
183   blas_memory_free(buffer);
184 
185   FUNCTION_PROFILE_END(4, n * n / 2 + 2 * n, 2 * n * n);
186 
187   IDEBUG_END;
188 
189   return;
190 }
191