1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National Laboratory
8 //                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
9 //
10 // File created by: Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National Laboratory
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 
14 //           http://pathintegrals.info                     //
15 /////////////////////////////////////////////////////////////
16 
17 #ifndef VECTOR_OPS_H
18 #define VECTOR_OPS_H
19 
20 // extern "C"{
21 // #ifdef USE_MKL
22 //   #include <mkl_cblas.h>
23 // #else
24 //   #include <cblas.h>
25 // #endif
26 // }
27 #include "Blitz.h"
28 #include "../config.h"
29 
30 typedef TinyVector<int, 3> Int3;
31 typedef Array<std::complex<double>, 1> zVec;
32 typedef Array<cVec3, 1> zVecVec;
33 typedef Array<cMat3, 1> zMatVec;
34 
35 #ifdef USE_CBLAS
36 extern "C"
37 {
38 #ifdef USE_MKL_CBLAS
39 #include <mkl_cblas.h>
40 #else
41 #include <cblas.h>
42 #endif
43 }
44 #else
45 
46 #define F77_DZNRM2 F77_FUNC(dznrm2, DZNRM2)
47 #define F77_ZDSCAL F77_FUNC(zdscal, ZDSCAL)
48 #define F77_ZDOTC F77_FUNC(zdotc, ZDOTC)
49 #define F77_ZGEMV F77_FUNC(zgemv, ZGEMV)
50 #define F77_ZAXPY F77_FUNC(zaxpy, ZAXPY)
51 
52 extern "C" double F77_DZNRM2(const int* N, const void* X, const int* INC);
53 extern "C" void F77_ZDSCAL(const int* N, double* ALPHA, const void* X, const int* INC);
54 // extern "C" void F77_ZDOTC (std::complex<double> *z, const int *N,
55 // 			   const void *X, const int *INCX,
56 // 			   const void *Y, const int *INCY);
57 extern "C" std::complex<double> F77_ZDOTC(const int* N, const void* X, const int* INCX, const void* Y, const int* INCY);
58 extern "C" void F77_ZGEMV(char* TRANS,
59                           const int* M,
60                           const int* N,
61                           std::complex<double>* alpha,
62                           const void* A,
63                           const int* LDA,
64                           const void* X,
65                           const int* INCX,
66                           std::complex<double>* beta,
67                           const void* Y,
68                           const int* INCY);
69 
70 extern "C" void F77_ZAXPY(const int* N, std::complex<double>* ALPHA, void* X, int* INCX, void* Y, int* INCY);
71 #endif
72 
73 
74 #ifdef USE_CBLAS
Normalize(zVec & c)75 inline void Normalize(zVec& c)
76 {
77   double norm = cblas_dznrm2(c.size(), c.data(), 1);
78   norm        = 1.0 / norm;
79   cblas_zdscal(c.size(), norm, c.data(), 1);
80 }
81 
norm(const zVec & c)82 inline double norm(const zVec& c) { return cblas_dznrm2(c.size(), c.data(), 1); }
83 
conjdot(zVec & cA,zVec & cB)84 inline std::complex<double> conjdot(zVec& cA, zVec& cB)
85 {
86   std::complex<double> z;
87   cblas_zdotc_sub(cA.size(), cA.data(), 1, cB.data(), 1, &z);
88   return z;
89 }
90 
Orthogonalize(const Array<std::complex<double>,2> & A,zVec & x)91 inline void Orthogonalize(const Array<std::complex<double>, 2>& A, zVec& x)
92 {
93   int m = A.rows();
94   int n = A.cols();
95   assert(n == x.size());
96   std::complex<double> zero(0.0, 0.0);
97   std::complex<double> one(1.0, 0.0);
98   std::complex<double> minusone(-1.0, 0.0);
99   Array<std::complex<double>, 1> S(m);
100 
101 
102   cblas_zgemv(CblasColMajor, CblasConjTrans, n, m, &one, A.data(), n, x.data(), 1, &zero, S.data(), 1);
103   cblas_zgemv(CblasRowMajor, CblasTrans, m, n, &minusone, A.data(), n, S.data(), 1, &one, x.data(), 1);
104 }
105 
106 #else
107 
Normalize(zVec & c)108 inline void Normalize(zVec& c)
109 {
110   const int inc = 1;
111   int n         = c.size();
112   double norm   = F77_DZNRM2(&n, c.data(), &inc);
113   norm          = 1.0 / norm;
114   F77_ZDSCAL(&n, &norm, c.data(), &inc);
115 }
116 
117 
norm(const zVec & c)118 inline double norm(const zVec& c)
119 {
120   double norm;
121   int n         = c.size();
122   const int inc = 1;
123   return F77_DZNRM2(&n, c.data(), &inc);
124 }
125 
126 
conjdot(zVec & cA,zVec & cB)127 inline std::complex<double> conjdot(zVec& cA, zVec& cB)
128 {
129   const int n    = cA.size();
130   const int incA = 1;
131   const int incB = 1;
132   return F77_ZDOTC(&n, cA.data(), &incA, cB.data(), &incB);
133 
134   //   std::complex<double> z;
135   //   F77_ZDOTC (&z, &n, cA.data(), &incA, cB.data(), &incB);
136   //   return z;
137 }
138 
Orthogonalize(const Array<std::complex<double>,2> & A,zVec & x)139 inline void Orthogonalize(const Array<std::complex<double>, 2>& A, zVec& x)
140 {
141   int m = A.rows();
142   int n = A.cols();
143   assert(n == x.size());
144   std::complex<double> zero(0.0, 0.0);
145   std::complex<double> one(1.0, 0.0);
146   std::complex<double> minusone(-1.0, 0.0);
147   Array<std::complex<double>, 1> S(m);
148 
149   // Calculate overlaps
150   // Calling with column major and ConjTrans is equivalent to
151   // conjugate of untransposed row major
152   char trans    = 'C';
153   const int inc = 1;
154 
155   F77_ZGEMV(&trans, &n, &m, &one, A.data(), &n, x.data(), &inc, &zero, S.data(), &inc);
156 
157   //   cblas_zgemv(CblasColMajor, CblasConjTrans, n, m, &one,
158   // 	      A.data(), n, x.data(), 1, &zero, S.data(), 1);
159 
160   //   for (int i=0; i<m; i++) {
161   //     fprintf (stderr, "S[%d] = %18.14f + %18.14fi\n",
162   // 	     real(S[i]), imag(S[i]));
163 
164   // Now, subtract off components * overlaps
165   trans = 'T';
166   F77_ZGEMV(&trans, &m, &n, &minusone, A.data(), &n, S.data(), &inc, &one, x.data(), &inc);
167   //   cblas_zgemv(CblasRowMajor, CblasTrans, m, n, &minusone,
168   //  	      A.data(), n, S.data(), 1, &one, x.data(), 1);
169 }
170 
171 #endif
172 
realconjdot(zVec & cA,zVec & cB)173 inline double realconjdot(zVec& cA, zVec& cB) { return conjdot(cA, cB).real(); }
174 
175 
mag(std::complex<double> x)176 inline double mag(std::complex<double> x) { return (x.real() * x.real() + x.imag() * x.imag()); }
177 
Orthogonalize2(Array<std::complex<double>,2> & A,zVec & x,int lastBand)178 inline void Orthogonalize2(Array<std::complex<double>, 2>& A, zVec& x, int lastBand)
179 {
180   int m = A.rows();
181   int n = A.cols();
182   assert(n == x.size());
183   zVec Ar;
184   Array<std::complex<double>, 1> S(m);
185 
186   for (int row = 0; row <= lastBand; row++)
187   {
188     Ar.reference(A(row, Range::all()));
189     S(row) = conjdot(Ar, x);
190   }
191   for (int row = 0; row <= lastBand; row++)
192     x -= S(row) * A(row, Range::all());
193   //   for (int row=0; row<=lastBand; row++) {
194   //     Ar.reference (A(row,Range::all()));
195   //     S(row) = conjdot (Ar, x);
196   //     if (mag(S(row)) > 1.0e-14) {
197   //       std::cerr << "row = " << row << " lastband = " << lastBand << std::endl;
198   //       std::cerr << "Error in Orthogonalize2!, s = " << S(row) << std::endl;
199   //       double norm = realconjdot (Ar, Ar);
200   //       std::cerr << "norm = " << norm << std::endl;
201   //     }
202   //   }
203 }
204 
OrthogExcluding(const Array<std::complex<double>,2> & A,zVec & x,int excluding)205 inline void OrthogExcluding(const Array<std::complex<double>, 2>& A, zVec& x, int excluding)
206 {
207   int m = A.rows();
208   int n = A.cols();
209   assert(n == x.size());
210   zVec Ar;
211 
212 #ifndef __INTEL_COMPILER
213   std::complex<double> S[m];
214   for (int row = 0; row < m; row++)
215   {
216     Ar.reference(A(row, Range::all()));
217     S[row] = conjdot(Ar, x);
218   }
219   for (int row = 0; row < m; row++)
220     if (row != excluding)
221       x -= S[row] * A(row, Range::all());
222 #else
223 
224   double Sre[m], Sim[m];
225 
226   for (int row = 0; row < m; row++)
227   {
228     Ar.reference(A(row, Range::all()));
229     std::complex<double> S = conjdot(Ar, x);
230     Sre[row]               = real(S);
231     Sim[row]               = imag(S);
232   }
233   for (int row = 0; row < m; row++)
234     if (row != excluding)
235       x -= std::complex<double>(Sre[row], Sim[row]) * A(row, Range::all());
236 #endif
237 }
238 
239 
OrthogLower(const Array<std::complex<double>,2> & A,zVec & x,int currBand)240 inline void OrthogLower(const Array<std::complex<double>, 2>& A, zVec& x, int currBand)
241 {
242   int m = currBand;
243   int n = A.cols();
244   assert(n == x.size());
245   zVec Ar;
246   std::complex<double> S;
247 
248   for (int row = 0; row < m; row++)
249   {
250     Ar.reference(A(row, Range::all()));
251     S = conjdot(Ar, x);
252     x -= S * A(row, Range::all());
253   }
254 }
255 
256 
GramSchmidt(Array<std::complex<double>,2> & A)257 inline void GramSchmidt(Array<std::complex<double>, 2>& A)
258 {
259   zVec a, b;
260   for (int i = 0; i < A.rows(); i++)
261   {
262     a.reference(A(i, Range::all()));
263     Normalize(a);
264     for (int j = i + 1; j < A.rows(); j++)
265     {
266       b.reference(A(j, Range::all()));
267       b = b - (conjdot(a, b) * a);
268       Normalize(b);
269     }
270   }
271 }
272 
Orthogonalize(Array<std::complex<double>,2> & A)273 inline void Orthogonalize(Array<std::complex<double>, 2>& A)
274 {
275   zVec x, y;
276   Array<std::complex<double>, 1> S(A.rows());
277   for (int iter = 0; iter < 40; iter++)
278   {
279     for (int i = 0; i < A.rows(); i++)
280     {
281       x.reference(A(i, Range::all()));
282       for (int j = i + 1; j < A.rows(); j++)
283       {
284         y.reference(A(j, Range::all()));
285         S(j) = conjdot(y, x);
286       }
287       for (int j = i + 1; j < A.rows(); j++)
288       {
289         y.reference(A(j, Range::all()));
290         x -= S(j) * y;
291       }
292       Normalize(x);
293     }
294   }
295 }
296 
297 
CheckOrthog(const Array<std::complex<double>,2> & A,zVec & x)298 inline void CheckOrthog(const Array<std::complex<double>, 2>& A, zVec& x)
299 {
300   zVec Ai;
301   double normInv = 1.0 / norm(x);
302   for (int i = 0; i < A.rows(); i++)
303   {
304     Ai.reference(A(i, Range::all()));
305     if (normInv * mag(conjdot(Ai, x)) > 1.0e-13)
306     {
307       std::cerr << "CheckOrthog failed for i=" << i << ".\n";
308       exit(1);
309     }
310   }
311 }
312 
zaxpy(std::complex<double> alpha,const Array<std::complex<double>,1> & x,const std::complex<double> y,Array<std::complex<double>,1> & axpy)313 inline void zaxpy(std::complex<double> alpha,
314                   const Array<std::complex<double>, 1>& x,
315                   const std::complex<double> y,
316                   Array<std::complex<double>, 1>& axpy)
317 {}
318 
319 
320 // inline Array<std::complex<double>,3>&
321 // operator*= (Array<std::complex<double>,3> &A, const Array<std::complex<double>,3> &B)
322 // {
323 
324 
325 // }
326 
327 
328 #endif
329