1 /*
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_HER2_TCC
34 #define CXXBLAS_LEVEL2_HER2_TCC 1
35
36 #include <complex>
37 #include "xflens/cxxblas/cxxblas.h"
38
39 namespace cxxblas {
40
41 template <typename IndexType, typename ALPHA, typename VX, typename VY,
42 typename MA>
43 void
her2_generic(StorageOrder order,StorageUpLo upLo,Transpose conjugateA,IndexType n,const ALPHA & alpha,const VX * x,IndexType incX,const VY * y,IndexType incY,MA * A,IndexType ldA)44 her2_generic(StorageOrder order, StorageUpLo upLo, Transpose conjugateA,
45 IndexType n,
46 const ALPHA &alpha,
47 const VX *x, IndexType incX,
48 const VY *y, IndexType incY,
49 MA *A, IndexType ldA)
50 {
51 if (alpha==ALPHA(0)) {
52 return;
53 }
54 if (order==ColMajor) {
55 upLo = (upLo==Upper) ? Lower : Upper;
56 conjugateA = Transpose(conjugateA^Conj);
57 her2_generic(RowMajor, upLo, conjugateA,
58 n, alpha, x, incX, y, incY,
59 A, ldA);
60 return;
61 }
62 #ifdef CXXBLAS_USE_XERBLA
63 // insert error check here
64 #endif
65 if (upLo==Upper) {
66 if (conjugateA==Conj) {
67 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
68 axpy_generic(n-i, conjugate(alpha*x[iX]),
69 y+iY, incY,
70 A+i*(ldA+1), IndexType(1));
71 axpy_generic(n-i, alpha*conjugate(y[iY]),
72 x+iX, incX,
73 A+i*(ldA+1), IndexType(1));
74 }
75 } else {
76 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
77 acxpy_generic(n-i, alpha*x[iX],
78 y+iY, incY,
79 A+i*(ldA+1), IndexType(1));
80 acxpy_generic(n-i, conjugate(alpha)*y[iY],
81 x+iX, incX,
82 A+i*(ldA+1), IndexType(1));
83 }
84 }
85 } else {
86 if (conjugateA==Conj) {
87 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
88 axpy_generic(i+1, conjugate(alpha*x[iX]),
89 y, incY,
90 A+i*ldA, IndexType(1));
91 axpy_generic(i+1, alpha*conjugate(y[iY]),
92 x, incX,
93 A+i*ldA, IndexType(1));
94 }
95 } else {
96 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
97 acxpy_generic(i+1, alpha*x[iX],
98 y, incY,
99 A+i*ldA, IndexType(1));
100 acxpy_generic(i+1, conjugate(alpha)*y[iY],
101 x, incX,
102 A+i*ldA, IndexType(1));
103 }
104 }
105 }
106 for (IndexType i=0; i<n; ++i) {
107 A[i*ldA+i] = cxxblas::real(A[i*ldA+i]);
108 }
109 }
110
111 template <typename IndexType, typename ALPHA, typename VX, typename VY,
112 typename MA>
113 void
her2(StorageOrder order,StorageUpLo upLo,IndexType n,const ALPHA & alpha,const VX * x,IndexType incX,const VY * y,IndexType incY,MA * A,IndexType ldA)114 her2(StorageOrder order, StorageUpLo upLo,
115 IndexType n,
116 const ALPHA &alpha,
117 const VX *x, IndexType incX,
118 const VY *y, IndexType incY,
119 MA *A, IndexType ldA)
120 {
121 CXXBLAS_DEBUG_OUT("her2_generic");
122
123 if (incX<0) {
124 x -= incX*(n-1);
125 }
126 if (incY<0) {
127 y -= incY*(n-1);
128 }
129 her2_generic(order, upLo, NoTrans, n, alpha, x, incX, y, incY, A, ldA);
130 }
131
132
133 #ifdef HAVE_CBLAS
134
135 // cgerc
136 template <typename IndexType>
137 typename If<IndexType>::isBlasCompatibleInteger
her2(StorageOrder order,StorageUpLo upLo,IndexType n,const ComplexFloat & alpha,const ComplexFloat * x,IndexType incX,const ComplexFloat * y,IndexType incY,ComplexFloat * A,IndexType ldA)138 her2(StorageOrder order, StorageUpLo upLo,
139 IndexType n,
140 const ComplexFloat &alpha,
141 const ComplexFloat *x, IndexType incX,
142 const ComplexFloat *y, IndexType incY,
143 ComplexFloat *A, IndexType ldA)
144 {
145 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_cher2");
146
147 cblas_cher2(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
148 n,
149 reinterpret_cast<const float *>(&alpha),
150 reinterpret_cast<const float *>(x), incX,
151 reinterpret_cast<const float *>(y), incY,
152 reinterpret_cast<float *>(A), ldA);
153 }
154
155 // zgerc
156 template <typename IndexType>
157 typename If<IndexType>::isBlasCompatibleInteger
her2(StorageOrder order,StorageUpLo upLo,IndexType n,const ComplexDouble & alpha,const ComplexDouble * x,IndexType incX,const ComplexDouble * y,IndexType incY,ComplexDouble * A,IndexType ldA)158 her2(StorageOrder order, StorageUpLo upLo,
159 IndexType n,
160 const ComplexDouble &alpha,
161 const ComplexDouble *x, IndexType incX,
162 const ComplexDouble *y, IndexType incY,
163 ComplexDouble *A, IndexType ldA)
164 {
165 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zher2");
166
167 cblas_zher2(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
168 n,
169 reinterpret_cast<const double *>(&alpha),
170 reinterpret_cast<const double *>(x), incX,
171 reinterpret_cast<const double *>(y), incY,
172 reinterpret_cast<double *>(A), ldA);
173 }
174
175 #endif // HAVE_CBLAS
176
177 } // namespace cxxblas
178
179 #endif // CXXBLAS_LEVEL2_HER2_TCC
180