1 /*
2  *   Copyright (c) 2009, Michael Lehn
3  *
4  *   All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1) Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2) Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in
14  *      the documentation and/or other materials provided with the
15  *      distribution.
16  *   3) Neither the name of the FLENS development group nor the names of
17  *      its contributors may be used to endorse or promote products derived
18  *      from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifndef CXXBLAS_LEVEL2_HER2_TCC
34 #define CXXBLAS_LEVEL2_HER2_TCC 1
35 
36 #include <complex>
37 #include "xflens/cxxblas/cxxblas.h"
38 
39 namespace cxxblas {
40 
41 template <typename IndexType, typename ALPHA, typename VX, typename VY,
42           typename MA>
43 void
her2_generic(StorageOrder order,StorageUpLo upLo,Transpose conjugateA,IndexType n,const ALPHA & alpha,const VX * x,IndexType incX,const VY * y,IndexType incY,MA * A,IndexType ldA)44 her2_generic(StorageOrder order,  StorageUpLo upLo, Transpose conjugateA,
45              IndexType n,
46              const ALPHA &alpha,
47              const VX *x, IndexType incX,
48              const VY *y, IndexType incY,
49              MA *A, IndexType ldA)
50 {
51     if (alpha==ALPHA(0)) {
52         return;
53     }
54     if (order==ColMajor) {
55         upLo = (upLo==Upper) ? Lower : Upper;
56         conjugateA = Transpose(conjugateA^Conj);
57         her2_generic(RowMajor, upLo, conjugateA,
58                      n, alpha, x, incX, y, incY,
59                      A, ldA);
60         return;
61     }
62     #ifdef CXXBLAS_USE_XERBLA
63         // insert error check here
64     #endif
65     if (upLo==Upper) {
66         if (conjugateA==Conj) {
67             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
68                 axpy_generic(n-i, conjugate(alpha*x[iX]),
69                                   y+iY, incY,
70                                   A+i*(ldA+1), IndexType(1));
71                 axpy_generic(n-i, alpha*conjugate(y[iY]),
72                                   x+iX, incX,
73                                   A+i*(ldA+1), IndexType(1));
74             }
75         } else {
76             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
77                 acxpy_generic(n-i, alpha*x[iX],
78                                    y+iY, incY,
79                                    A+i*(ldA+1), IndexType(1));
80                 acxpy_generic(n-i, conjugate(alpha)*y[iY],
81                                    x+iX, incX,
82                                    A+i*(ldA+1), IndexType(1));
83             }
84         }
85     } else {
86         if (conjugateA==Conj) {
87             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
88                 axpy_generic(i+1, conjugate(alpha*x[iX]),
89                                   y, incY,
90                                   A+i*ldA, IndexType(1));
91                 axpy_generic(i+1, alpha*conjugate(y[iY]),
92                                   x, incX,
93                                   A+i*ldA, IndexType(1));
94             }
95         } else {
96             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
97                 acxpy_generic(i+1, alpha*x[iX],
98                                    y, incY,
99                                    A+i*ldA, IndexType(1));
100                 acxpy_generic(i+1, conjugate(alpha)*y[iY],
101                                    x, incX,
102                                    A+i*ldA, IndexType(1));
103             }
104         }
105     }
106     for (IndexType i=0; i<n; ++i) {
107         A[i*ldA+i] = cxxblas::real(A[i*ldA+i]);
108     }
109 }
110 
111 template <typename IndexType, typename ALPHA, typename VX, typename VY,
112           typename MA>
113 void
her2(StorageOrder order,StorageUpLo upLo,IndexType n,const ALPHA & alpha,const VX * x,IndexType incX,const VY * y,IndexType incY,MA * A,IndexType ldA)114 her2(StorageOrder order,  StorageUpLo upLo,
115      IndexType n,
116      const ALPHA &alpha,
117      const VX *x, IndexType incX,
118      const VY *y, IndexType incY,
119      MA *A, IndexType ldA)
120 {
121     CXXBLAS_DEBUG_OUT("her2_generic");
122 
123     if (incX<0) {
124         x -= incX*(n-1);
125     }
126     if (incY<0) {
127         y -= incY*(n-1);
128     }
129     her2_generic(order, upLo, NoTrans, n, alpha, x, incX, y, incY, A, ldA);
130 }
131 
132 
133 #ifdef HAVE_CBLAS
134 
135 // cgerc
136 template <typename IndexType>
137 typename If<IndexType>::isBlasCompatibleInteger
her2(StorageOrder order,StorageUpLo upLo,IndexType n,const ComplexFloat & alpha,const ComplexFloat * x,IndexType incX,const ComplexFloat * y,IndexType incY,ComplexFloat * A,IndexType ldA)138 her2(StorageOrder order,   StorageUpLo upLo,
139       IndexType n,
140       const ComplexFloat &alpha,
141       const ComplexFloat *x, IndexType incX,
142       const ComplexFloat *y, IndexType incY,
143       ComplexFloat *A, IndexType ldA)
144 {
145     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_cher2");
146 
147     cblas_cher2(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
148                 n,
149                 reinterpret_cast<const float *>(&alpha),
150                 reinterpret_cast<const float *>(x), incX,
151                 reinterpret_cast<const float *>(y), incY,
152                 reinterpret_cast<float *>(A), ldA);
153 }
154 
155 // zgerc
156 template <typename IndexType>
157 typename If<IndexType>::isBlasCompatibleInteger
her2(StorageOrder order,StorageUpLo upLo,IndexType n,const ComplexDouble & alpha,const ComplexDouble * x,IndexType incX,const ComplexDouble * y,IndexType incY,ComplexDouble * A,IndexType ldA)158 her2(StorageOrder order, StorageUpLo upLo,
159       IndexType n,
160       const ComplexDouble &alpha,
161       const ComplexDouble *x, IndexType incX,
162       const ComplexDouble *y, IndexType incY,
163       ComplexDouble *A, IndexType ldA)
164 {
165     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zher2");
166 
167     cblas_zher2(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
168                 n,
169                 reinterpret_cast<const double *>(&alpha),
170                 reinterpret_cast<const double *>(x), incX,
171                 reinterpret_cast<const double *>(y), incY,
172                 reinterpret_cast<double *>(A), ldA);
173 }
174 
175 #endif // HAVE_CBLAS
176 
177 } // namespace cxxblas
178 
179 #endif // CXXBLAS_LEVEL2_HER2_TCC
180