1 /*
2  *   Copyright (c) 2010, Michael Lehn
3  *
4  *   All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1) Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2) Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in
14  *      the documentation and/or other materials provided with the
15  *      distribution.
16  *   3) Neither the name of the FLENS development group nor the names of
17  *      its contributors may be used to endorse or promote products derived
18  *      from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifndef CXXBLAS_LEVEL2_HPMV_TCC
34 #define CXXBLAS_LEVEL2_HPMV_TCC 1
35 
36 #include <complex>
37 #include "xflens/cxxblas/cxxblas.h"
38 
39 namespace cxxblas {
40 
41 template <typename IndexType, typename ALPHA, typename MA, typename VX,
42           typename BETA, typename VY>
43 void
hpmv_generic(StorageOrder order,StorageUpLo upLo,Transpose conjugateA,IndexType n,const ALPHA & alpha,const MA * A,const VX * x,IndexType incX,const BETA & beta,VY * y,IndexType incY)44 hpmv_generic(StorageOrder order, StorageUpLo upLo, Transpose conjugateA,
45              IndexType n,
46              const ALPHA &alpha,
47              const MA *A,
48              const VX *x, IndexType incX,
49              const BETA &beta,
50              VY *y, IndexType incY)
51 {
52     if (order==ColMajor) {
53         upLo = (upLo==Upper) ? Lower : Upper;
54         conjugateA = Transpose(conjugateA^Conj);
55     }
56     scal_init_generic(n, beta, y, incY);
57     if (upLo==Upper) {
58         if (conjugateA==Conj) {
59             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
60                 y[iY] += alpha*cxxblas::real(A[i*(2*n-i+1)/2]) * x[iX];
61 
62                 VY y_ = VY(0);
63                 dot_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
64                                    x+iX+incX, incX, y_);
65                 y[iY] += alpha*y_;
66                 axpy_generic(n-i-1, alpha*x[iX],
67                                     A+i*(2*n-i+1)/2+1, IndexType(1),
68                                     y+iY+incY, incY);
69             }
70         } else {
71             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
72                 y[iY] += alpha*cxxblas::real(A[i*(2*n-i+1)/2]) * x[iX];
73 
74                 VY y_ = VY(0);
75                 dotu_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
76                                     x+iX+incX, incX, y_);
77                 y[iY] += alpha*y_;
78                 acxpy_generic(n-i-1, alpha*x[iX],
79                                      A+i*(2*n-i+1)/2+1, IndexType(1),
80                                      y+iY+incY, incY);
81             }
82         }
83     } else {
84         if (conjugateA==Conj) {
85             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
86                 y[iY] += alpha*cxxblas::real(A[i+i*(i+1)/2]) * x[iX];
87 
88                 VY y_ = VY(0);
89                 dot_generic(i, A+i*(i+1)/2, IndexType(1), x, incX, y_);
90                 y[iY] += alpha*y_;
91                 axpy_generic(i, alpha*x[iX],
92                                 A+i*(i+1)/2, IndexType(1),
93                                 y, incY);
94             }
95         } else {
96             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
97                 y[iY] += alpha*cxxblas::real(A[i+i*(i+1)/2]) * x[iX];
98 
99                 VY y_ = VY(0);
100                 dotu_generic(i, A+i*(i+1)/2, IndexType(1), x, incX, y_);
101                 y[iY] += alpha*y_;
102                 acxpy_generic(i, alpha*x[iX],
103                                  A+i*(i+1)/2, IndexType(1),
104                                  y, incY);
105             }
106         }
107     }
108 }
109 
110 //------------------------------------------------------------------------------
111 
112 template <typename IndexType, typename ALPHA, typename MA, typename VX,
113           typename BETA, typename VY>
114 void
hpmv(StorageOrder order,StorageUpLo upLo,IndexType n,const ALPHA & alpha,const MA * A,const VX * x,IndexType incX,const BETA & beta,VY * y,IndexType incY)115 hpmv(StorageOrder order, StorageUpLo upLo,
116      IndexType n,
117      const ALPHA &alpha,
118      const MA *A,
119      const VX *x, IndexType incX,
120      const BETA &beta,
121      VY *y, IndexType incY)
122 {
123     CXXBLAS_DEBUG_OUT("hpmv_generic");
124 
125     if (incX<0) {
126         x -= incX*(n-1);
127     }
128     if (incY<0) {
129         y -= incY*(n-1);
130     }
131     hpmv_generic(order, upLo, NoTrans, n, alpha, A, x, incX, beta, y, incY);
132 }
133 
134 
135 #ifdef HAVE_CBLAS
136 
137 // chpmv
138 template <typename IndexType>
139 typename If<IndexType>::isBlasCompatibleInteger
hpmv(StorageOrder order,StorageUpLo upLo,IndexType n,const ComplexFloat & alpha,const ComplexFloat * A,const ComplexFloat * x,IndexType incX,const ComplexFloat & beta,ComplexFloat * y,IndexType incY)140 hpmv(StorageOrder order, StorageUpLo upLo,
141      IndexType n,
142      const ComplexFloat &alpha,
143      const ComplexFloat *A,
144      const ComplexFloat *x, IndexType incX,
145      const ComplexFloat &beta,
146      ComplexFloat *y, IndexType incY)
147 {
148     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_chpmv");
149 
150     cblas_chpmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n,
151                 reinterpret_cast<const float *>(&alpha),
152                 reinterpret_cast<const float *>(A),
153                 reinterpret_cast<const float *>(x), incX,
154                 reinterpret_cast<const float *>(&beta),
155                 reinterpret_cast<float *>(y), incY);
156 }
157 
158 // zhpmv
159 template <typename IndexType>
160 typename If<IndexType>::isBlasCompatibleInteger
hpmv(StorageOrder order,StorageUpLo upLo,IndexType n,const ComplexDouble & alpha,const ComplexDouble * A,const ComplexDouble * x,IndexType incX,const ComplexDouble & beta,ComplexDouble * y,IndexType incY)161 hpmv(StorageOrder order, StorageUpLo upLo,
162      IndexType n,
163      const ComplexDouble &alpha,
164      const ComplexDouble *A,
165      const ComplexDouble *x, IndexType incX,
166      const ComplexDouble &beta,
167      ComplexDouble *y, IndexType incY)
168 {
169     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zhpmv");
170 
171     cblas_zhpmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n,
172                 reinterpret_cast<const double *>(&alpha),
173                 reinterpret_cast<const double *>(A),
174                 reinterpret_cast<const double *>(x), incX,
175                 reinterpret_cast<const double *>(&beta),
176                 reinterpret_cast<double *>(y), incY);
177 }
178 
179 #endif // HAVE_CBLAS
180 
181 
182 } // namespace cxxblas
183 
184 #endif // CXXBLAS_LEVEL2_HPMV_TCC
185