1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National Laboratory
8 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
9 //
10 // File created by: Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National Laboratory
11 //////////////////////////////////////////////////////////////////////////////////////
12
13
14 // http://pathintegrals.info //
15 /////////////////////////////////////////////////////////////
16
17 #ifndef VECTOR_OPS_H
18 #define VECTOR_OPS_H
19
20 // extern "C"{
21 // #ifdef USE_MKL
22 // #include <mkl_cblas.h>
23 // #else
24 // #include <cblas.h>
25 // #endif
26 // }
27 #include "Blitz.h"
28 #include "../config.h"
29
30 typedef TinyVector<int, 3> Int3;
31 typedef Array<std::complex<double>, 1> zVec;
32 typedef Array<cVec3, 1> zVecVec;
33 typedef Array<cMat3, 1> zMatVec;
34
35 #ifdef USE_CBLAS
36 extern "C"
37 {
38 #ifdef USE_MKL_CBLAS
39 #include <mkl_cblas.h>
40 #else
41 #include <cblas.h>
42 #endif
43 }
44 #else
45
46 #define F77_DZNRM2 F77_FUNC(dznrm2, DZNRM2)
47 #define F77_ZDSCAL F77_FUNC(zdscal, ZDSCAL)
48 #define F77_ZDOTC F77_FUNC(zdotc, ZDOTC)
49 #define F77_ZGEMV F77_FUNC(zgemv, ZGEMV)
50 #define F77_ZAXPY F77_FUNC(zaxpy, ZAXPY)
51
52 extern "C" double F77_DZNRM2(const int* N, const void* X, const int* INC);
53 extern "C" void F77_ZDSCAL(const int* N, double* ALPHA, const void* X, const int* INC);
54 // extern "C" void F77_ZDOTC (std::complex<double> *z, const int *N,
55 // const void *X, const int *INCX,
56 // const void *Y, const int *INCY);
57 extern "C" std::complex<double> F77_ZDOTC(const int* N, const void* X, const int* INCX, const void* Y, const int* INCY);
58 extern "C" void F77_ZGEMV(char* TRANS,
59 const int* M,
60 const int* N,
61 std::complex<double>* alpha,
62 const void* A,
63 const int* LDA,
64 const void* X,
65 const int* INCX,
66 std::complex<double>* beta,
67 const void* Y,
68 const int* INCY);
69
70 extern "C" void F77_ZAXPY(const int* N, std::complex<double>* ALPHA, void* X, int* INCX, void* Y, int* INCY);
71 #endif
72
73
74 #ifdef USE_CBLAS
Normalize(zVec & c)75 inline void Normalize(zVec& c)
76 {
77 double norm = cblas_dznrm2(c.size(), c.data(), 1);
78 norm = 1.0 / norm;
79 cblas_zdscal(c.size(), norm, c.data(), 1);
80 }
81
norm(const zVec & c)82 inline double norm(const zVec& c) { return cblas_dznrm2(c.size(), c.data(), 1); }
83
conjdot(zVec & cA,zVec & cB)84 inline std::complex<double> conjdot(zVec& cA, zVec& cB)
85 {
86 std::complex<double> z;
87 cblas_zdotc_sub(cA.size(), cA.data(), 1, cB.data(), 1, &z);
88 return z;
89 }
90
Orthogonalize(const Array<std::complex<double>,2> & A,zVec & x)91 inline void Orthogonalize(const Array<std::complex<double>, 2>& A, zVec& x)
92 {
93 int m = A.rows();
94 int n = A.cols();
95 assert(n == x.size());
96 std::complex<double> zero(0.0, 0.0);
97 std::complex<double> one(1.0, 0.0);
98 std::complex<double> minusone(-1.0, 0.0);
99 Array<std::complex<double>, 1> S(m);
100
101
102 cblas_zgemv(CblasColMajor, CblasConjTrans, n, m, &one, A.data(), n, x.data(), 1, &zero, S.data(), 1);
103 cblas_zgemv(CblasRowMajor, CblasTrans, m, n, &minusone, A.data(), n, S.data(), 1, &one, x.data(), 1);
104 }
105
106 #else
107
Normalize(zVec & c)108 inline void Normalize(zVec& c)
109 {
110 const int inc = 1;
111 int n = c.size();
112 double norm = F77_DZNRM2(&n, c.data(), &inc);
113 norm = 1.0 / norm;
114 F77_ZDSCAL(&n, &norm, c.data(), &inc);
115 }
116
117
norm(const zVec & c)118 inline double norm(const zVec& c)
119 {
120 double norm;
121 int n = c.size();
122 const int inc = 1;
123 return F77_DZNRM2(&n, c.data(), &inc);
124 }
125
126
conjdot(zVec & cA,zVec & cB)127 inline std::complex<double> conjdot(zVec& cA, zVec& cB)
128 {
129 const int n = cA.size();
130 const int incA = 1;
131 const int incB = 1;
132 return F77_ZDOTC(&n, cA.data(), &incA, cB.data(), &incB);
133
134 // std::complex<double> z;
135 // F77_ZDOTC (&z, &n, cA.data(), &incA, cB.data(), &incB);
136 // return z;
137 }
138
Orthogonalize(const Array<std::complex<double>,2> & A,zVec & x)139 inline void Orthogonalize(const Array<std::complex<double>, 2>& A, zVec& x)
140 {
141 int m = A.rows();
142 int n = A.cols();
143 assert(n == x.size());
144 std::complex<double> zero(0.0, 0.0);
145 std::complex<double> one(1.0, 0.0);
146 std::complex<double> minusone(-1.0, 0.0);
147 Array<std::complex<double>, 1> S(m);
148
149 // Calculate overlaps
150 // Calling with column major and ConjTrans is equivalent to
151 // conjugate of untransposed row major
152 char trans = 'C';
153 const int inc = 1;
154
155 F77_ZGEMV(&trans, &n, &m, &one, A.data(), &n, x.data(), &inc, &zero, S.data(), &inc);
156
157 // cblas_zgemv(CblasColMajor, CblasConjTrans, n, m, &one,
158 // A.data(), n, x.data(), 1, &zero, S.data(), 1);
159
160 // for (int i=0; i<m; i++) {
161 // fprintf (stderr, "S[%d] = %18.14f + %18.14fi\n",
162 // real(S[i]), imag(S[i]));
163
164 // Now, subtract off components * overlaps
165 trans = 'T';
166 F77_ZGEMV(&trans, &m, &n, &minusone, A.data(), &n, S.data(), &inc, &one, x.data(), &inc);
167 // cblas_zgemv(CblasRowMajor, CblasTrans, m, n, &minusone,
168 // A.data(), n, S.data(), 1, &one, x.data(), 1);
169 }
170
171 #endif
172
realconjdot(zVec & cA,zVec & cB)173 inline double realconjdot(zVec& cA, zVec& cB) { return conjdot(cA, cB).real(); }
174
175
mag(std::complex<double> x)176 inline double mag(std::complex<double> x) { return (x.real() * x.real() + x.imag() * x.imag()); }
177
Orthogonalize2(Array<std::complex<double>,2> & A,zVec & x,int lastBand)178 inline void Orthogonalize2(Array<std::complex<double>, 2>& A, zVec& x, int lastBand)
179 {
180 int m = A.rows();
181 int n = A.cols();
182 assert(n == x.size());
183 zVec Ar;
184 Array<std::complex<double>, 1> S(m);
185
186 for (int row = 0; row <= lastBand; row++)
187 {
188 Ar.reference(A(row, Range::all()));
189 S(row) = conjdot(Ar, x);
190 }
191 for (int row = 0; row <= lastBand; row++)
192 x -= S(row) * A(row, Range::all());
193 // for (int row=0; row<=lastBand; row++) {
194 // Ar.reference (A(row,Range::all()));
195 // S(row) = conjdot (Ar, x);
196 // if (mag(S(row)) > 1.0e-14) {
197 // std::cerr << "row = " << row << " lastband = " << lastBand << std::endl;
198 // std::cerr << "Error in Orthogonalize2!, s = " << S(row) << std::endl;
199 // double norm = realconjdot (Ar, Ar);
200 // std::cerr << "norm = " << norm << std::endl;
201 // }
202 // }
203 }
204
OrthogExcluding(const Array<std::complex<double>,2> & A,zVec & x,int excluding)205 inline void OrthogExcluding(const Array<std::complex<double>, 2>& A, zVec& x, int excluding)
206 {
207 int m = A.rows();
208 int n = A.cols();
209 assert(n == x.size());
210 zVec Ar;
211
212 #ifndef __INTEL_COMPILER
213 std::complex<double> S[m];
214 for (int row = 0; row < m; row++)
215 {
216 Ar.reference(A(row, Range::all()));
217 S[row] = conjdot(Ar, x);
218 }
219 for (int row = 0; row < m; row++)
220 if (row != excluding)
221 x -= S[row] * A(row, Range::all());
222 #else
223
224 double Sre[m], Sim[m];
225
226 for (int row = 0; row < m; row++)
227 {
228 Ar.reference(A(row, Range::all()));
229 std::complex<double> S = conjdot(Ar, x);
230 Sre[row] = real(S);
231 Sim[row] = imag(S);
232 }
233 for (int row = 0; row < m; row++)
234 if (row != excluding)
235 x -= std::complex<double>(Sre[row], Sim[row]) * A(row, Range::all());
236 #endif
237 }
238
239
OrthogLower(const Array<std::complex<double>,2> & A,zVec & x,int currBand)240 inline void OrthogLower(const Array<std::complex<double>, 2>& A, zVec& x, int currBand)
241 {
242 int m = currBand;
243 int n = A.cols();
244 assert(n == x.size());
245 zVec Ar;
246 std::complex<double> S;
247
248 for (int row = 0; row < m; row++)
249 {
250 Ar.reference(A(row, Range::all()));
251 S = conjdot(Ar, x);
252 x -= S * A(row, Range::all());
253 }
254 }
255
256
GramSchmidt(Array<std::complex<double>,2> & A)257 inline void GramSchmidt(Array<std::complex<double>, 2>& A)
258 {
259 zVec a, b;
260 for (int i = 0; i < A.rows(); i++)
261 {
262 a.reference(A(i, Range::all()));
263 Normalize(a);
264 for (int j = i + 1; j < A.rows(); j++)
265 {
266 b.reference(A(j, Range::all()));
267 b = b - (conjdot(a, b) * a);
268 Normalize(b);
269 }
270 }
271 }
272
Orthogonalize(Array<std::complex<double>,2> & A)273 inline void Orthogonalize(Array<std::complex<double>, 2>& A)
274 {
275 zVec x, y;
276 Array<std::complex<double>, 1> S(A.rows());
277 for (int iter = 0; iter < 40; iter++)
278 {
279 for (int i = 0; i < A.rows(); i++)
280 {
281 x.reference(A(i, Range::all()));
282 for (int j = i + 1; j < A.rows(); j++)
283 {
284 y.reference(A(j, Range::all()));
285 S(j) = conjdot(y, x);
286 }
287 for (int j = i + 1; j < A.rows(); j++)
288 {
289 y.reference(A(j, Range::all()));
290 x -= S(j) * y;
291 }
292 Normalize(x);
293 }
294 }
295 }
296
297
CheckOrthog(const Array<std::complex<double>,2> & A,zVec & x)298 inline void CheckOrthog(const Array<std::complex<double>, 2>& A, zVec& x)
299 {
300 zVec Ai;
301 double normInv = 1.0 / norm(x);
302 for (int i = 0; i < A.rows(); i++)
303 {
304 Ai.reference(A(i, Range::all()));
305 if (normInv * mag(conjdot(Ai, x)) > 1.0e-13)
306 {
307 std::cerr << "CheckOrthog failed for i=" << i << ".\n";
308 exit(1);
309 }
310 }
311 }
312
zaxpy(std::complex<double> alpha,const Array<std::complex<double>,1> & x,const std::complex<double> y,Array<std::complex<double>,1> & axpy)313 inline void zaxpy(std::complex<double> alpha,
314 const Array<std::complex<double>, 1>& x,
315 const std::complex<double> y,
316 Array<std::complex<double>, 1>& axpy)
317 {}
318
319
320 // inline Array<std::complex<double>,3>&
321 // operator*= (Array<std::complex<double>,3> &A, const Array<std::complex<double>,3> &B)
322 // {
323
324
325 // }
326
327
328 #endif
329