1 /*  Copyright (C) 2003-2007  CAMP
2  *  Copyright (C) 2007-2009  CAMd
3  *  Copyright (C) 2010  Argonne National Laboratory
4  *  Please see the accompanying LICENSE file for further information. */
5 
6 #ifdef PARALLEL
7 #include <Python.h>
8 #ifdef GPAW_WITH_SL
9 #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
10 #define NO_IMPORT_ARRAY
11 #include <numpy/arrayobject.h>
12 #include <stdlib.h>
13 #include <mpi.h>
14 #include <structmember.h>
15 #include "extensions.h"
16 #include "mympi.h"
17 
18 // BLACS
19 #define BLOCK_CYCLIC_2D 1
20 
21 #ifdef GPAW_NO_UNDERSCORE_CBLACS
22 #define Cblacs_barrier_    Cblacs_barrier
23 #define Cblacs_gridexit_   Cblacs_gridexit
24 #define Cblacs_gridinfo_   Cblacs_gridinfo
25 #define Cblacs_gridinit_   Cblacs_gridinit
26 #define Cblacs_pinfo_      Cblacs_pinfo
27 #define Csys2blacs_handle_ Csys2blacs_handle
28 #endif
29 
30 void Cblacs_barrier_(int ConTxt, char *scope);
31 
32 void Cblacs_gridexit_(int ConTxt);
33 
34 void Cblacs_gridinfo_(int ConTxt, int* nprow, int* npcol,
35                       int* myrow, int* mycol);
36 
37 void Cblacs_gridinit_(int* ConTxt, char* order, int nprow, int npcol);
38 
39 void Cblacs_pinfo_(int* mypnum, int* nprocs);
40 
41 int Csys2blacs_handle_(MPI_Comm SysCtxt);
42 // End of BLACS
43 
44 // ScaLAPACK
45 #ifdef GPAW_NO_UNDERSCORE_SCALAPACK
46 #define   numroc_  numroc
47 #define   pdlamch_ pdlamch
48 #define   pdlaset_ pdlaset
49 #define   pzlaset_ pzlaset
50 
51 #define   pdpotrf_ pdpotrf
52 #define   pzpotrf_ pzpotrf
53 #define   pzpotri_ pzpotri
54 #define   pdtrtri_ pdtrtri
55 #define   pztrtri_ pztrtri
56 
57 #define   pzgesv_ pzgesv
58 #define   pdgesv_ pdgesv
59 
60 #define   pdsyevd_  pdsyevd
61 #define   pzheevd_  pzheevd
62 #define   pdsyevx_  pdsyevx
63 #define   pzheevx_  pzheevx
64 #define   pdsygvx_  pdsygvx
65 #define   pzhegvx_  pzhegvx
66 #define   pdsyngst_ pdsyngst
67 #define   pzhengst_ pzhengst
68 #ifdef GPAW_MR3
69 #define   pdsyevr_  pdsyevr
70 #define   pzheevr_  pzheevr
71 #endif // GPAW_MR3
72 
73 #define   pdtran_  pdtran
74 #define   pztranc_ pztranc
75 #define   pztranu_ pztranu
76 #define   pdgemm_  pdgemm
77 #define   pzgemm_  pzgemm
78 #define   pdgemv_  pdgemv
79 #define   pzgemv_  pzgemv
80 #define   pdsyr2k_ pdsyr2k
81 #define   pzher2k_ pzher2k
82 #define   pdsyrk_  pdsyrk
83 #define   pzherk_  pzherk
84 #define   pdtrsm_  pdtrsm
85 #define   pztrsm_  pztrsm
86 
87 #define   pzhemm_  pzhemm
88 #define   pzsymm_  pzsymm
89 #define   pdsymm_  pdsymm
90 
91 #endif
92 
93 #ifdef GPAW_NO_UNDERSCORE_CSCALAPACK
94 #define   Cpdgemr2d_  Cpdgemr2d
95 #define   Cpzgemr2d_  Cpzgemr2d
96 #define   Cpdtrmr2d_  Cpdtrmr2d
97 #define   Cpztrmr2d_  Cpztrmr2d
98 #endif
99 
100 // tools
101 int numroc_(int* n, int* nb, int* iproc, int* isrcproc, int* nprocs);
102 
103 void Cpdgemr2d_(int m, int n,
104                 double* a, int ia, int ja, int* desca,
105                 double* b, int ib, int jb, int* descb,
106                 int gcontext);
107 
108 void Cpzgemr2d_(int m, int n,
109                 void* a, int ia, int ja, int* desca,
110                 void* b, int ib, int jb, int* descb,
111                 int gcontext);
112 
113 void Cpdtrmr2d_(char* uplo, char* diag, int m, int n,
114                 double* a, int ia, int ja, int* desca,
115                 double* b, int ib, int jb, int* descb,
116                 int gcontext);
117 
118 void Cpztrmr2d_(char* uplo, char* diag, int m, int n,
119                 void* a, int ia, int ja, int* desca,
120                 void* b, int ib, int jb, int* descb,
121                 int gcontext);
122 
123 double pdlamch_(int* ictxt, char* cmach);
124 
125 void pzpotri_(char* uplo, int* n, void* a, int *ia, int* ja, int* desca, int* info);
126 
127 void pzgetri_(int* n, void* a,
128               int *ia, int* ja, int* desca, int* info);
129 
130 void pdlaset_(char* uplo, int* m, int* n, double* alpha, double* beta,
131               double* a, int* ia, int* ja, int* desca);
132 
133 void pzlaset_(char* uplo, int* m, int* n, void* alpha, void* beta,
134               void* a, int* ia, int* ja, int* desca);
135 
136 // cholesky
137 void pdpotrf_(char* uplo, int* n, double* a,
138               int* ia, int* ja, int* desca, int* info);
139 
140 void pzpotrf_(char* uplo, int* n, void* a,
141               int* ia, int* ja, int* desca, int* info);
142 
143 void pzgesv_(int* n, int* nrhs, void* a,
144              int* ia, int* ja, int* desca, int* ipiv,
145              void* b, int* ib, int* jb, int* descb, int* info);
146 
147 void pdgesv_(int *n, int *nrhs, void *a,
148              int *ia, int *ja, int* desca, int *ipiv,
149              void* b, int* ib, int* jb, int* descb, int* info);
150 
151 
152 void pdtrtri_(char* uplo, char* diag, int* n, double* a,
153               int *ia, int* ja, int* desca, int* info);
154 
155 void pztrtri_(char* uplo, char* diag, int* n, void* a,
156               int *ia, int* ja, int* desca, int* info);
157 
158 // diagonalization
159 void pdsyevd_(char* jobz, char* uplo, int* n,
160               double* a, int* ia, int* ja, int* desca,
161               double* w, double* z, int* iz, int* jz,
162               int* descz, double* work, int* lwork, int* iwork,
163               int* liwork, int* info);
164 
165 void pzheevd_(char* jobz, char* uplo, int* n,
166               void* a, int* ia, int* ja, int* desca,
167               double* w, void* z, int* iz, int* jz,
168               int* descz, void* work, int* lwork, double* rwork,
169               int* lrwork, int* iwork, int* liwork, int* info);
170 
171 void pdsyevx_(char* jobz, char* range,
172               char* uplo, int* n,
173               double* a, int* ia, int* ja, int* desca,
174               double* vl, double* vu,
175               int* il, int* iu, double* abstol,
176               int* m, int* nz, double* w, double* orfac,
177               double* z, int* iz, int* jz, int* descz,
178               double* work, int* lwork, int* iwork, int* liwork,
179               int* ifail, int* iclustr, double* gap, int* info);
180 
181 void pzheevx_(char* jobz, char* range,
182               char* uplo, int* n,
183               void* a, int* ia, int* ja, int* desca,
184               double* vl, double* vu,
185               int* il, int* iu, double* abstol,
186               int* m, int* nz,  double* w, double* orfac,
187               void* z, int* iz, int* jz, int* descz,
188               void* work, int* lwork, double* rwork, int* lrwork,
189               int* iwork, int* liwork,
190               int* ifail, int* iclustr, double* gap, int* info);
191 
192 void pdsygvx_(int* ibtype, char* jobz, char* range,
193               char* uplo, int* n,
194               double* a, int* ia, int* ja, int* desca,
195               double* b, int *ib, int* jb, int* descb,
196               double* vl, double* vu,
197               int* il, int* iu, double* abstol,
198               int* m, int* nz, double* w, double* orfac,
199               double* z, int* iz, int* jz, int* descz,
200               double* work, int* lwork, int* iwork, int* liwork,
201               int* ifail, int* iclustr, double* gap, int* info);
202 
203 void pzhegvx_(int* ibtype, char* jobz, char* range,
204               char* uplo, int* n,
205               void* a, int* ia, int* ja, int* desca,
206               void* b, int *ib, int* jb, int* descb,
207               double* vl, double* vu,
208               int* il, int* iu, double* abstol,
209               int* m, int* nz,  double* w, double* orfac,
210               void* z, int* iz, int* jz, int* descz,
211               void* work, int* lwork, double* rwork, int* lrwork,
212               int* iwork, int* liwork,
213               int* ifail, int* iclustr, double* gap, int* info);
214 
215 void pdsyngst_(int* ibtype, char* uplo, int* n,
216                double* a, int* ia, int* ja, int* desca,
217                double* b, int* ib, int* jb, int* descb,
218                double* scale, double* work, int* lwork, int* info);
219 
220 void pzhengst_(int* ibtype, char* uplo, int* n,
221                void* a, int* ia, int* ja, int* desca,
222                void* b, int* ib, int* jb, int* descb,
223                double* scale, void* work, int* lwork, int* info);
224 
225 #ifdef GPAW_MR3
226 void pdsyevr_(char* jobz, char* range,
227               char* uplo, int* n,
228               double* a, int* ia, int* ja, int* desca,
229               double* vl, double* vu,
230               int* il, int* iu,
231               int* m, int* nz, double* w,
232               double* z, int* iz, int* jz, int* descz,
233               double* work, int* lwork, int* iwork, int* liwork,
234               int* info);
235 
236 void pzheevr_(char* jobz, char* range,
237               char* uplo, int* n,
238               void* a, int* ia, int* ja, int* desca,
239               double* vl, double* vu,
240               int* il, int* iu,
241               int* m, int* nz,  double* w,
242               void* z, int* iz, int* jz, int* descz,
243               void* work, int* lwork, double* rwork, int* lrwork,
244               int* iwork, int* liwork,
245               int* info);
246 #endif // GPAW_MR3
247 
248 // pblas
249 void pdtran_(int* m, int* n,
250              double* alpha,
251              double* a, int* ia, int* ja, int* desca,
252              double* beta,
253              double* c, int* ic, int* jc, int* descc);
254 
255 void pztranc_(int* m, int* n,
256               void* alpha,
257               void* a, int* ia, int* ja, int* desca,
258               void* beta,
259               void* c, int* ic, int* jc, int* descc);
260 
261 void pztranu_(int* m, int* n,
262               void* alpha,
263               void* a, int* ia, int* ja, int* desca,
264               void* beta,
265               void* c, int* ic, int* jc, int* descc);
266 
267 void pdgemm_(char* transa, char* transb, int* m, int* n, int* k,
268              double* alpha,
269              double* a, int* ia, int* ja, int* desca,
270              double* b, int* ib, int* jb, int* descb,
271              double* beta,
272              double* c, int* ic, int* jc, int* descc);
273 
274 void pzgemm_(char* transa, char* transb, int* m, int* n, int* k,
275              void* alpha,
276              void* a, int* ia, int* ja, int* desca,
277              void* b, int* ib, int* jb, int* descb,
278              void* beta,
279              void* c, int* ic, int* jc, int* descc);
280 
281 void pzhemm_(char* side, char* uplo, int* m, int* n,
282              void* alpha,
283              void* a, int* ia, int* ja, int* desca,
284              void* b, int* ib, int* jb, int* descb,
285              void* beta,
286              void* c, int* ic, int* jc, int* descc);
287 
288 void pzsymm_(char* side, char* uplo, int* m, int* n,
289              void* alpha,
290              void* a, int* ia, int* ja, int* desca,
291              void* b, int* ib, int* jb, int* descb,
292              void* beta,
293              void* c, int* ic, int* jc, int* descc);
294 
295 void pdsymm_(char* side, char* uplo, int* m, int* n,
296              void* alpha,
297              void* a, int* ia, int* ja, int* desca,
298              void* b, int* ib, int* jb, int* descb,
299              void* beta,
300              void* c, int* ic, int* jc, int* descc);
301 
302 void pdgemv_(char* transa, int* m, int* n, double* alpha,
303              double* a, int* ia, int* ja, int* desca,
304              double* x, int* ix, int* jx, int* descx, int* incx,
305              double* beta,
306              double* y, int* iy, int* jy, int* descy, int* incy);
307 
308 void pzgemv_(char* transa, int* m, int* n, void* alpha,
309              void* a, int* ia, int* ja, int* desca,
310              void* x, int* ix, int* jx, int* descx, int* incx,
311              void* beta,
312              void* y, int* iy, int* jy, int* descy, int* incy);
313 
314 void pdsyr2k_(char* uplo, char* trans, int* n, int* k,
315               double* alpha,
316               double* a, int* ia, int* ja, int* desca,
317               double* b, int* ib, int* jb, int* descb,
318               double* beta,
319               double* c, int* ic, int *jc, int* descc);
320 
321 void pzher2k_(char* uplo, char* trans, int* n, int* k,
322               void* alpha,
323               void* a, int* ia, int* ja, int* desca,
324               void* b, int* ib, int* jb, int* descb,
325               void* beta,
326               void* c, int* ic, int* jc, int* descc);
327 
328 void pdsyrk_(char* uplo, char* trans, int* n, int* k,
329              double* alpha,
330              double* a, int* ia, int* ja, int* desca,
331              double* beta,
332              double* c, int* ic, int* jc, int* descc);
333 
334 void pzherk_(char* uplo, char* trans, int* n, int* k,
335              void* alpha,
336              void* a, int* ia, int* ja, int* desca,
337              void* beta,
338              void* c, int* ic, int* jc, int* descc);
339 
340 void pdtrsm_(char* side, char* uplo, char* trans, char* diag,
341              int* m, int *n, double* alpha,
342              double* a, int* ia, int* ja, int* desca,
343              double* b, int* ib, int* jb, int* descb);
344 
345 void pztrsm_(char* side, char* uplo, char* trans, char* diag,
346              int* m, int *n, void* alpha,
347              void* a, int* ia, int* ja, int* desca,
348              void* b, int* ib, int* jb, int* descb);
349 
350 
pblas_tran(PyObject * self,PyObject * args)351 PyObject* pblas_tran(PyObject *self, PyObject *args)
352 {
353     int m, n;
354     Py_complex alpha;
355     Py_complex beta;
356     PyArrayObject *a, *c;
357     PyArrayObject *desca, *descc;
358     int conj;
359 
360     if (!PyArg_ParseTuple(args, "iiDODOOOi", &m, &n, &alpha,
361                           &a, &beta, &c,
362                           &desca, &descc,
363                           &conj))
364         return NULL;
365 
366     int one = 1;
367     if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
368         pdtran_(&m, &n,
369                 &(alpha.real),
370                 DOUBLEP(a), &one, &one, INTP(desca),
371                 &(beta.real),
372                 DOUBLEP(c), &one, &one, INTP(descc));
373     else if (conj)
374         pztranc_(&m, &n,
375                  &alpha,
376                  (void*)PyArray_DATA(a), &one, &one, INTP(desca),
377                  &beta,
378                  (void*)PyArray_DATA(c), &one, &one, INTP(descc));
379     else
380         pztranu_(&m, &n,
381                  &alpha,
382                  (void*)PyArray_DATA(a), &one, &one, INTP(desca),
383                  &beta,
384                  (void*)PyArray_DATA(c), &one, &one, INTP(descc));
385     Py_RETURN_NONE;
386 }
387 
pblas_gemm(PyObject * self,PyObject * args)388 PyObject* pblas_gemm(PyObject *self, PyObject *args)
389 {
390   char* transa;
391   char* transb;
392   int m, n, k;
393   Py_complex alpha;
394   Py_complex beta;
395   PyArrayObject *a, *b, *c;
396   PyArrayObject *desca, *descb, *descc;
397   int one = 1;
398 
399   if (!PyArg_ParseTuple(args, "iiiDOODOOOOss", &m, &n, &k, &alpha,
400                         &a, &b, &beta, &c,
401                         &desca, &descb, &descc,
402                         &transa, &transb)) {
403     return NULL;
404   }
405 
406   // cdesc
407   // int c_ConTxt = INTP(descc)[1];
408 
409   // If process not on BLACS grid, then return.
410   // if (c_ConTxt == -1) Py_RETURN_NONE;
411 
412   if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
413     pdgemm_(transa, transb, &m, &n, &k,
414             &(alpha.real),
415             DOUBLEP(a), &one, &one, INTP(desca),
416             DOUBLEP(b), &one, &one, INTP(descb),
417             &(beta.real),
418             DOUBLEP(c), &one, &one, INTP(descc));
419   else
420     pzgemm_(transa, transb, &m, &n, &k,
421             &alpha,
422             (void*)COMPLEXP(a), &one, &one, INTP(desca),
423             (void*)COMPLEXP(b), &one, &one, INTP(descb),
424             &beta,
425             (void*)COMPLEXP(c), &one, &one, INTP(descc));
426 
427   Py_RETURN_NONE;
428 }
429 
430 
pblas_hemm_symm(PyObject * self,PyObject * args)431 PyObject* pblas_hemm_symm(PyObject *self, PyObject *args)
432 {
433   char* side;
434   char* uplo;
435   int m, n;
436   Py_complex alpha;
437   Py_complex beta;
438   PyArrayObject *a, *b, *c;
439   PyArrayObject *desca, *descb, *descc;
440   int hemm;
441   int one = 1;
442   if (!PyArg_ParseTuple(args, "ssiiDOODOOOOi",
443                  &side, &uplo, &n, &m,
444                  &alpha, &a, &b, &beta,
445                  &c, &desca, &descb, &descc,
446                  &hemm)) {
447     return NULL;
448   }
449 
450   if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) {
451      pdsymm_(side, uplo, &n, &m, &(alpha.real),
452              (void*)DOUBLEP(a), &one, &one, INTP(desca),
453              (void*)DOUBLEP(b), &one, &one, INTP(descb),
454              &(beta.real),
455              (void*)DOUBLEP(c), &one, &one, INTP(descc));
456   } else if (hemm) {
457      pzhemm_(side, uplo, &n, &m, &alpha,
458              (void*)COMPLEXP(a), &one, &one, INTP(desca),
459              (void*)COMPLEXP(b), &one, &one, INTP(descb),
460              &beta,
461              (void*)COMPLEXP(c), &one, &one, INTP(descc));
462   } else {
463      pzsymm_(side, uplo, &n, &m, &alpha,
464              (void*)COMPLEXP(a), &one, &one, INTP(desca),
465              (void*)COMPLEXP(b), &one, &one, INTP(descb),
466              &beta,
467              (void*)COMPLEXP(c), &one, &one, INTP(descc));
468   }
469 
470  Py_RETURN_NONE;
471 }
472 
pblas_gemv(PyObject * self,PyObject * args)473 PyObject* pblas_gemv(PyObject *self, PyObject *args)
474 {
475   char* transa;
476   int m, n;
477   Py_complex alpha;
478   Py_complex beta;
479   PyArrayObject *a, *x, *y;
480   int incx = 1, incy = 1; // what should these be?
481   PyArrayObject *desca, *descx, *descy;
482   int one = 1;
483   if (!PyArg_ParseTuple(args, "iiDOODOOOOs",
484                         &m, &n, &alpha,
485                         &a, &x, &beta, &y,
486                         &desca, &descx,
487                         &descy, &transa)) {
488     return NULL;
489   }
490 
491   // ydesc
492   // int y_ConTxt = INTP(descy)[1];
493 
494   // If process not on BLACS grid, then return.
495   // if (y_ConTxt == -1) Py_RETURN_NONE;
496 
497   if (PyArray_DESCR(y)->type_num == NPY_DOUBLE)
498     pdgemv_(transa, &m, &n,
499             &(alpha.real),
500             DOUBLEP(a), &one, &one, INTP(desca),
501             DOUBLEP(x), &one, &one, INTP(descx), &incx,
502             &(beta.real),
503             DOUBLEP(y), &one, &one, INTP(descy), &incy);
504   else
505     pzgemv_(transa, &m, &n,
506             &alpha,
507             (void*)COMPLEXP(a), &one, &one, INTP(desca),
508             (void*)COMPLEXP(x), &one, &one, INTP(descx), &incx,
509             &beta,
510             (void*)COMPLEXP(y), &one, &one, INTP(descy), &incy);
511 
512   Py_RETURN_NONE;
513 }
514 
pblas_r2k(PyObject * self,PyObject * args)515 PyObject* pblas_r2k(PyObject *self, PyObject *args)
516 {
517   char* uplo;
518   int n, k;
519   Py_complex alpha;
520   Py_complex beta;
521   PyArrayObject *a, *b, *c;
522   PyArrayObject *desca, *descb, *descc;
523   int one = 1;
524 
525   if (!PyArg_ParseTuple(args, "iiDOODOOOOs", &n, &k, &alpha,
526                         &a, &b, &beta, &c,
527                         &desca, &descb, &descc,
528                         &uplo)) {
529     return NULL;
530   }
531 
532   // cdesc
533   // int c_ConTxt = INTP(descc)[1];
534 
535   // If process not on BLACS grid, then return.
536   // if (c_ConTxt == -1) Py_RETURN_NONE;
537 
538   if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
539     pdsyr2k_(uplo, "T", &n, &k,
540              &(alpha.real),
541              DOUBLEP(a), &one, &one, INTP(desca),
542              DOUBLEP(b), &one, &one, INTP(descb),
543              &(beta.real),
544              DOUBLEP(c), &one, &one, INTP(descc));
545   else
546     pzher2k_(uplo, "C", &n, &k,
547              &alpha,
548              (void*)COMPLEXP(a), &one, &one, INTP(desca),
549              (void*)COMPLEXP(b), &one, &one, INTP(descb),
550              &beta,
551              (void*)COMPLEXP(c), &one, &one, INTP(descc));
552 
553   Py_RETURN_NONE;
554 }
555 
pblas_rk(PyObject * self,PyObject * args)556 PyObject* pblas_rk(PyObject *self, PyObject *args)
557 {
558   char* uplo;
559   int n, k;
560   Py_complex alpha;
561   Py_complex beta;
562   PyArrayObject *a, *c;
563   PyArrayObject *desca, *descc;
564   int one = 1;
565 
566   if (!PyArg_ParseTuple(args, "iiDODOOOs", &n, &k, &alpha,
567                         &a, &beta, &c,
568                         &desca, &descc,
569                         &uplo)) {
570     return NULL;
571   }
572 
573   // cdesc
574   // int c_ConTxt = INTP(descc)[1];
575 
576   // If process not on BLACS grid, then return.
577   // if (c_ConTxt == -1) Py_RETURN_NONE;
578 
579   if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
580     pdsyrk_(uplo, "T", &n, &k,
581             &(alpha.real),
582             DOUBLEP(a), &one, &one, INTP(desca),
583             &(beta.real),
584             DOUBLEP(c), &one, &one, INTP(descc));
585   else
586     pzherk_(uplo, "C", &n, &k,
587             &alpha,
588             (void*)COMPLEXP(a), &one, &one, INTP(desca),
589             &beta,
590             (void*)COMPLEXP(c), &one, &one, INTP(descc));
591 
592   Py_RETURN_NONE;
593 }
594 
new_blacs_context(PyObject * self,PyObject * args)595 PyObject* new_blacs_context(PyObject *self, PyObject *args)
596 {
597   PyObject* comm_obj;
598   int nprow, npcol;
599 
600   int iam, nprocs;
601   int ConTxt;
602   char* order;
603 
604   if (!PyArg_ParseTuple(args, "Oiis", &comm_obj, &nprow, &npcol, &order)){
605     return NULL;
606   }
607 
608   // Create blacs grid on this communicator
609   MPI_Comm comm = ((MPIObject*)comm_obj)->comm;
610 
611   // Get my id and nprocs. This is for debugging purposes only
612   Cblacs_pinfo_(&iam, &nprocs);
613   MPI_Comm_size(comm, &nprocs);
614 
615   // Create blacs grid on this communicator continued
616   ConTxt = Csys2blacs_handle_(comm);
617   Cblacs_gridinit_(&ConTxt, order, nprow, npcol);
618   PyObject* returnvalue = Py_BuildValue("i", ConTxt);
619   return returnvalue;
620 }
621 
get_blacs_gridinfo(PyObject * self,PyObject * args)622 PyObject* get_blacs_gridinfo(PyObject *self, PyObject *args)
623 {
624   int ConTxt, nprow, npcol;
625   int myrow, mycol;
626 
627   if (!PyArg_ParseTuple(args, "iii", &ConTxt, &nprow, &npcol)) {
628     return NULL;
629   }
630 
631   Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
632   return Py_BuildValue("(ii)", myrow, mycol);
633 }
634 
635 
get_blacs_local_shape(PyObject * self,PyObject * args)636 PyObject* get_blacs_local_shape(PyObject *self, PyObject *args)
637 {
638   int ConTxt;
639   int m, n, mb, nb, rsrc, csrc;
640   int nprow, npcol, myrow, mycol;
641   int locM, locN;
642 
643   if (!PyArg_ParseTuple(args, "iiiiiii", &ConTxt, &m, &n, &mb,
644                         &nb, &rsrc, &csrc)){
645     return NULL;
646   }
647 
648   Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
649   locM = numroc_(&m, &mb, &myrow, &rsrc, &nprow);
650   locN = numroc_(&n, &nb, &mycol, &csrc, &npcol);
651   return Py_BuildValue("(ii)", locM, locN);
652 }
653 
blacs_destroy(PyObject * self,PyObject * args)654 PyObject* blacs_destroy(PyObject *self, PyObject *args)
655 {
656   int ConTxt;
657   if (!PyArg_ParseTuple(args, "i", &ConTxt))
658     return NULL;
659 
660   Cblacs_gridexit_(ConTxt);
661 
662   Py_RETURN_NONE;
663 }
664 
scalapack_set(PyObject * self,PyObject * args)665 PyObject* scalapack_set(PyObject *self, PyObject *args)
666 {
667   PyArrayObject* a; // matrix;
668   PyArrayObject* desca; // descriptor
669   Py_complex alpha;
670   Py_complex beta;
671   int m, n;
672   int ia, ja;
673   char* uplo;
674 
675   if (!PyArg_ParseTuple(args, "OODDsiiii", &a, &desca,
676                         &alpha, &beta, &uplo,
677                         &m, &n, &ia, &ja))
678     return NULL;
679 
680   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
681     pdlaset_(uplo, &m, &n, &(alpha.real), &(beta.real), DOUBLEP(a),
682              &ia, &ja, INTP(desca));
683   else
684     pzlaset_(uplo, &m, &n, &alpha, &beta, (void*)COMPLEXP(a),
685              &ia, &ja, INTP(desca));
686 
687   Py_RETURN_NONE;
688 }
689 
scalapack_redist(PyObject * self,PyObject * args)690 PyObject* scalapack_redist(PyObject *self, PyObject *args)
691 {
692   PyArrayObject* a; // source matrix
693   PyArrayObject* b; // destination matrix
694   PyArrayObject* desca; // source descriptor
695   PyArrayObject* descb; // destination descriptor
696 
697   char* uplo;
698   char diag='N'; // copy the diagonal
699   int c_ConTxt;
700   int m;
701   int n;
702 
703   int ia, ja, ib, jb;
704 
705   if (!PyArg_ParseTuple(args, "OOOOiiiiiiis",
706                         &desca, &descb,
707                         &a, &b,
708                         &m, &n,
709                         &ia, &ja,
710                         &ib, &jb,
711                         &c_ConTxt,
712                         &uplo))
713     return NULL;
714 
715   if (*uplo == 'G') // General matrix
716     {
717       if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
718         Cpdgemr2d_(m, n,
719                    DOUBLEP(a), ia, ja, INTP(desca),
720                    DOUBLEP(b), ib, jb, INTP(descb),
721                    c_ConTxt);
722       else
723         Cpzgemr2d_(m, n,
724                    (void*)COMPLEXP(a), ia, ja, INTP(desca),
725                    (void*)COMPLEXP(b), ib, jb, INTP(descb),
726                    c_ConTxt);
727     }
728   else // Trapezoidal matrix
729     {
730       if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
731         Cpdtrmr2d_(uplo, &diag, m, n,
732                    DOUBLEP(a), ia, ja, INTP(desca),
733                    DOUBLEP(b), ib, jb, INTP(descb),
734                    c_ConTxt);
735       else
736         Cpztrmr2d_(uplo, &diag, m, n,
737                    (void*)COMPLEXP(a), ia, ja, INTP(desca),
738                    (void*)COMPLEXP(b), ib, jb, INTP(descb),
739                    c_ConTxt);
740     }
741 
742   Py_RETURN_NONE;
743 }
744 
scalapack_diagonalize_dc(PyObject * self,PyObject * args)745 PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args)
746 {
747   // Standard driver for divide and conquer algorithm
748   // Computes all eigenvalues and eigenvectors
749 
750   PyArrayObject* a; // symmetric matrix
751   PyArrayObject* desca; // symmetric matrix description vector
752   PyArrayObject* z; // eigenvector matrix
753   PyArrayObject* w; // eigenvalue array
754   int one = 1;
755 
756   char jobz = 'V'; // eigenvectors also
757   char* uplo;
758 
759   if (!PyArg_ParseTuple(args, "OOsOO", &a, &desca, &uplo, &z, &w))
760     return NULL;
761 
762   // adesc
763   // int a_ConTxt = INTP(desca)[1];
764   int a_m      = INTP(desca)[2];
765   int a_n      = INTP(desca)[3];
766 
767   // zdesc = adesc; this can be relaxed a bit according to pdsyevd.f
768 
769   // Only square matrices
770   assert (a_m == a_n);
771   int n = a_n;
772 
773   // If process not on BLACS grid, then return.
774   // if (a_ConTxt == -1) Py_RETURN_NONE;
775 
776   // Query part, need to find the optimal size of a number of work arrays
777   int info;
778   int querywork = -1;
779   int* iwork;
780   int liwork;
781   int lwork;
782   int lrwork;
783   int i_work;
784   double d_work;
785   double_complex c_work;
786 
787   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
788     {
789       pdsyevd_(&jobz, uplo, &n,
790                DOUBLEP(a), &one, &one, INTP(desca),
791                DOUBLEP(w),
792                DOUBLEP(z), &one,  &one, INTP(desca),
793                &d_work, &querywork, &i_work, &querywork, &info);
794       lwork = (int)(d_work);
795       // Sometimes lwork is not large enough.  Found this formula on
796       // the internet:
797       lwork = MAX(131072, 2 * (int) lwork + 1);
798     }
799   else
800     {
801       pzheevd_(&jobz, uplo, &n,
802                (void*)COMPLEXP(a), &one, &one, INTP(desca),
803                DOUBLEP(w),
804                (void*)COMPLEXP(z), &one,  &one, INTP(desca),
805                (void*)&c_work, &querywork, &d_work, &querywork,
806                &i_work, &querywork, &info);
807       lwork = (int)(c_work);
808       lrwork = (int)(d_work);
809     }
810 
811   if (info != 0)
812     {
813       PyErr_SetString(PyExc_RuntimeError,
814                       "scalapack_diagonalize_dc error in query.");
815       return NULL;
816     }
817 
818   // Computation part
819   liwork = MAX(8 * n, i_work + 1);
820   iwork = GPAW_MALLOC(int, liwork);
821   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
822     {
823       double* work = GPAW_MALLOC(double, lwork);
824       pdsyevd_(&jobz, uplo, &n,
825                DOUBLEP(a), &one, &one, INTP(desca),
826                DOUBLEP(w),
827                DOUBLEP(z), &one, &one, INTP(desca),
828                work, &lwork, iwork, &liwork, &info);
829       free(work);
830     }
831   else
832     {
833       double_complex *work = GPAW_MALLOC(double_complex, lwork);
834       double* rwork = GPAW_MALLOC(double, lrwork);
835       pzheevd_(&jobz, uplo, &n,
836                (void*)COMPLEXP(a), &one, &one, INTP(desca),
837                DOUBLEP(w),
838                (void*)COMPLEXP(z), &one, &one, INTP(desca),
839                (void*)work, &lwork, rwork, &lrwork,
840                iwork, &liwork, &info);
841       free(rwork);
842       free(work);
843     }
844   free(iwork);
845 
846   PyObject* returnvalue = Py_BuildValue("i", info);
847   return returnvalue;
848 }
849 
scalapack_diagonalize_ex(PyObject * self,PyObject * args)850 PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args)
851 {
852   // Standard driver for bisection and inverse iteration algorithm
853   // Computes 'iu' eigenvalues and eigenvectors
854 
855   PyArrayObject* a; // Hamiltonian matrix
856   PyArrayObject* desca; // Hamintonian matrix descriptor
857   PyArrayObject* z; // eigenvector matrix
858   PyArrayObject* w; // eigenvalue array
859   int a_mycol = -1;
860   int a_myrow = -1;
861   int a_nprow, a_npcol;
862   int il = 1;  // not used when range = 'A' or 'V'
863   int iu;
864   int eigvalm, nz;
865   int one = 1;
866 
867   double vl, vu; // not used when range = 'A' or 'I'
868 
869   char jobz = 'V'; // eigenvectors also
870   char range = 'I'; // eigenvalues il-th through iu-th
871   char* uplo;
872 
873   if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu,
874                         &z, &w))
875     return NULL;
876 
877   // a desc
878   int a_ConTxt = INTP(desca)[1];
879   int a_m      = INTP(desca)[2];
880   int a_n      = INTP(desca)[3];
881 
882   // Only square matrices
883   assert (a_m == a_n);
884   int n = a_n;
885 
886   // zdesc = adesc = bdesc; required by pdsyevx.f
887 
888   // If process not on BLACS grid, then return.
889   // if (a_ConTxt == -1) Py_RETURN_NONE;
890 
891   Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
892 
893   // Convergence tolerance
894   double abstol = 1.0e-8;
895   // char cmach = 'U'; // most orthogonal eigenvectors
896   // char cmach = 'S'; // most acccurate eigenvalues
897   // double abstol = pdlamch_(&a_ConTxt, &cmach);     // most orthogonal eigenvectors
898   // double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
899 
900   double orfac = -1.0;
901 
902   // Query part, need to find the optimal size of a number of work arrays
903   int info;
904   int *ifail;
905   ifail = GPAW_MALLOC(int, n);
906   int *iclustr;
907   iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
908   double  *gap;
909   gap = GPAW_MALLOC(double, a_nprow*a_npcol);
910   int querywork = -1;
911   int* iwork;
912   int liwork;
913   int lwork;  // workspace size must be at least 3
914   int lrwork; // workspace size must be at least 3
915   int i_work;
916   double d_work[3];
917   double_complex c_work;
918 
919   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
920     {
921       pdsyevx_(&jobz, &range, uplo, &n,
922                DOUBLEP(a), &one, &one, INTP(desca),
923                &vl, &vu, &il, &iu, &abstol, &eigvalm,
924                &nz, DOUBLEP(w), &orfac,
925                DOUBLEP(z), &one, &one, INTP(desca),
926                d_work, &querywork,  &i_work, &querywork,
927                ifail, iclustr, gap, &info);
928       lwork = MAX(3, (int)(d_work[0]));
929     }
930   else
931     {
932       pzheevx_(&jobz, &range, uplo, &n,
933                (void*)COMPLEXP(a), &one, &one, INTP(desca),
934                &vl, &vu, &il, &iu, &abstol, &eigvalm,
935                &nz, DOUBLEP(w), &orfac,
936                (void*)COMPLEXP(z), &one, &one, INTP(desca),
937                (void*)&c_work, &querywork, d_work, &querywork,
938                &i_work, &querywork,
939                ifail, iclustr, gap, &info);
940       lwork = MAX(3, (int)(c_work));
941       lrwork = MAX(3, (int)(d_work[0]));
942     }
943 
944   if (info != 0) {
945     printf ("info = %d", info);
946     PyErr_SetString(PyExc_RuntimeError,
947                     "scalapack_diagonalize_ex error in query.");
948     return NULL;
949   }
950 
951   // Computation part
952   // lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
953   liwork = i_work;
954   iwork = GPAW_MALLOC(int, liwork);
955 
956   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
957     {
958       double* work = GPAW_MALLOC(double, lwork);
959       pdsyevx_(&jobz, &range, uplo, &n,
960                DOUBLEP(a), &one, &one, INTP(desca),
961                &vl, &vu, &il, &iu, &abstol, &eigvalm,
962                &nz, DOUBLEP(w), &orfac,
963                DOUBLEP(z), &one, &one, INTP(desca),
964                work, &lwork, iwork, &liwork,
965                ifail, iclustr, gap, &info);
966       free(work);
967     }
968   else
969     {
970       double_complex* work = GPAW_MALLOC(double_complex, lwork);
971       double* rwork = GPAW_MALLOC(double, lrwork);
972       pzheevx_(&jobz, &range, uplo, &n,
973                (void*)COMPLEXP(a), &one, &one, INTP(desca),
974                &vl, &vu, &il, &iu, &abstol, &eigvalm,
975                &nz, DOUBLEP(w), &orfac,
976                (void*)COMPLEXP(z), &one, &one, INTP(desca),
977                (void*)work, &lwork, rwork, &lrwork,
978                iwork, &liwork,
979                ifail, iclustr, gap, &info);
980       free(rwork);
981       free(work);
982     }
983   free(iwork);
984   free(gap);
985   free(iclustr);
986   free(ifail);
987 
988   // If this fails, fewer eigenvalues than requested were computed.
989   assert (eigvalm == iu);
990   PyObject* returnvalue = Py_BuildValue("i", info);
991   return returnvalue;
992 }
993 
994 #ifdef GPAW_MR3
scalapack_diagonalize_mr3(PyObject * self,PyObject * args)995 PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args)
996 {
997   // Standard driver for MRRR algorithm
998   // Computes 'iu' eigenvalues and eigenvectors
999   // http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
1000 
1001   PyArrayObject* a; // Hamiltonian matrix
1002   PyArrayObject* desca; // Hamintonian matrix descriptor
1003   PyArrayObject* z; // eigenvector matrix
1004   PyArrayObject* w; // eigenvalue array
1005   int il = 1;  // not used when range = 'A' or 'V'
1006   int iu;
1007   int eigvalm, nz;
1008   int one = 1;
1009 
1010   double vl, vu; // not used when range = 'A' or 'I'
1011 
1012   char jobz = 'V'; // eigenvectors also
1013   char range = 'I'; // eigenvalues il-th through iu-th
1014   char* uplo;
1015 
1016   if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu,
1017                         &z, &w))
1018     return NULL;
1019 
1020   // a desc
1021   // int a_ConTxt = INTP(desca)[1];
1022   int a_m      = INTP(desca)[2];
1023   int a_n      = INTP(desca)[3];
1024 
1025   // Only square matrices
1026   assert (a_m == a_n);
1027   int n = a_n;
1028 
1029   // zdesc = adesc = bdesc; required by pdsyevx.f
1030 
1031   // If process not on BLACS grid, then return.
1032   // if (a_ConTxt == -1) Py_RETURN_NONE;
1033 
1034   // Query part, need to find the optimal size of a number of work arrays
1035   int info;
1036   int querywork = -1;
1037   int* iwork;
1038   int liwork;
1039   int lwork;
1040   int lrwork;
1041   int i_work;
1042   double d_work[3];
1043   double_complex c_work;
1044 
1045   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1046     {
1047       pdsyevr_(&jobz, &range, uplo, &n,
1048                DOUBLEP(a), &one, &one, INTP(desca),
1049                &vl, &vu, &il, &iu, &eigvalm,
1050                &nz, DOUBLEP(w),
1051                DOUBLEP(z), &one, &one, INTP(desca),
1052                d_work, &querywork,  &i_work, &querywork,
1053                &info);
1054       lwork = (int)(d_work[0]);
1055     }
1056   else
1057     {
1058       pzheevr_(&jobz, &range, uplo, &n,
1059                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1060                &vl, &vu, &il, &iu, &eigvalm,
1061                &nz, DOUBLEP(w),
1062                (void*)COMPLEXP(z), &one, &one, INTP(desca),
1063                (void*)&c_work, &querywork, d_work, &querywork,
1064                &i_work, &querywork,
1065                &info);
1066       lwork = (int)(c_work);
1067       lrwork = (int)(d_work[0]);
1068     }
1069 
1070   if (info != 0) {
1071     printf ("info = %d", info);
1072     PyErr_SetString(PyExc_RuntimeError,
1073                     "scalapack_diagonalize_evr error in query.");
1074     return NULL;
1075   }
1076 
1077   // Computation part
1078   liwork = i_work;
1079   iwork = GPAW_MALLOC(int, liwork);
1080 
1081   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1082     {
1083       double* work = GPAW_MALLOC(double, lwork);
1084       pdsyevr_(&jobz, &range, uplo, &n,
1085                DOUBLEP(a), &one, &one, INTP(desca),
1086                &vl, &vu, &il, &iu, &eigvalm,
1087                &nz, DOUBLEP(w),
1088                DOUBLEP(z), &one, &one, INTP(desca),
1089                work, &lwork, iwork, &liwork,
1090                &info);
1091       free(work);
1092     }
1093   else
1094     {
1095       double_complex* work = GPAW_MALLOC(double_complex, lwork);
1096       double* rwork = GPAW_MALLOC(double, lrwork);
1097       pzheevr_(&jobz, &range, uplo, &n,
1098                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1099                &vl, &vu, &il, &iu, &eigvalm,
1100                &nz, DOUBLEP(w),
1101                (void*)COMPLEXP(z), &one, &one, INTP(desca),
1102                (void*)work, &lwork, rwork, &lrwork,
1103                iwork, &liwork,
1104                &info);
1105       free(rwork);
1106       free(work);
1107     }
1108   free(iwork);
1109 
1110   // If this fails, fewer eigenvalues than requested were computed.
1111   assert (eigvalm == iu);
1112   PyObject* returnvalue = Py_BuildValue("i", info);
1113   return returnvalue;
1114 }
1115 #endif
1116 
scalapack_general_diagonalize_dc(PyObject * self,PyObject * args)1117 PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args)
1118 {
1119   // General driver for divide and conquer algorithm
1120   // Computes *all* eigenvalues and eigenvectors
1121 
1122   PyArrayObject* a; // Hamiltonian matrix
1123   PyArrayObject* b; // overlap matrix
1124   PyArrayObject* desca; // Hamintonian matrix descriptor
1125   PyArrayObject* z; // eigenvector matrix
1126   PyArrayObject* w; // eigenvalue array
1127   int ibtype  =  1; // Solve H*psi = lambda*S*psi
1128   int one = 1;
1129 
1130   char jobz = 'V'; // eigenvectors also
1131   char* uplo;
1132 
1133   double scale;
1134 
1135   if (!PyArg_ParseTuple(args, "OOsOOO", &a, &desca, &uplo,
1136                         &b, &z, &w))
1137     return NULL;
1138 
1139   // a desc
1140   // int a_ConTxt = INTP(desca)[1];
1141   int a_m      = INTP(desca)[2];
1142   int a_n      = INTP(desca)[3];
1143 
1144   // Only square matrices
1145   assert (a_m == a_n);
1146   int n = a_n;
1147 
1148   // zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
1149 
1150   // If process not on BLACS grid, then return.
1151   // if (a_ConTxt == -1) Py_RETURN_NONE;
1152 
1153   // Cholesky Decomposition
1154   int info;
1155   if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
1156     pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
1157   else
1158     pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
1159 
1160   if (info != 0)
1161     {
1162       PyErr_SetString(PyExc_RuntimeError,
1163                       "scalapack_general_diagonalize_dc error in Cholesky.");
1164       return NULL;
1165     }
1166 
1167   // Query variables
1168   int querywork = -1;
1169   int* iwork;
1170   int liwork;
1171   int lwork;
1172   int lrwork;
1173   int i_work;
1174   double d_work;
1175   double_complex c_work;
1176   // NGST Query
1177   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1178     {
1179       pdsyngst_(&ibtype, uplo, &n,
1180                 DOUBLEP(a), &one, &one, INTP(desca),
1181                 DOUBLEP(b), &one, &one, INTP(desca),
1182                 &scale, &d_work, &querywork, &info);
1183       lwork = (int)(d_work);
1184     }
1185   else
1186     {
1187       pzhengst_(&ibtype, uplo, &n,
1188                 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1189                 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1190                 &scale, (void*)&c_work, &querywork, &info);
1191       lwork = (int)(c_work);
1192     }
1193 
1194   if (info != 0) {
1195     PyErr_SetString(PyExc_RuntimeError,
1196                     "scalapack_general_diagonalize_dc error in NGST query.");
1197     return NULL;
1198   }
1199   // NGST Compute
1200   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1201     {
1202       double* work = GPAW_MALLOC(double, lwork);
1203       pdsyngst_(&ibtype, uplo, &n,
1204                 DOUBLEP(a), &one, &one, INTP(desca),
1205                 DOUBLEP(b), &one, &one, INTP(desca),
1206                 &scale, work, &lwork, &info);
1207       free(work);
1208     }
1209   else
1210     {
1211       double_complex* work = GPAW_MALLOC(double_complex, lwork);
1212       pzhengst_(&ibtype, uplo, &n,
1213                 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1214                 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1215                 &scale, (void*)work, &lwork, &info);
1216       free(work);
1217     }
1218 
1219   if (info != 0) {
1220     PyErr_SetString(PyExc_RuntimeError,
1221                     "scalapack_general_diagonalize_dc error in NGST compute.");
1222     return NULL;
1223   }
1224 
1225   // NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
1226   // may need to rescale eigenvalues by scale. This can be accomplised by using
1227   // the BLAS1 d/zscal. See pdsygvx.f
1228 
1229   // EVD Query
1230   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1231     {
1232       pdsyevd_(&jobz, uplo, &n,
1233                DOUBLEP(a), &one, &one, INTP(desca),
1234                DOUBLEP(w),
1235                DOUBLEP(z), &one,  &one, INTP(desca),
1236                &d_work, &querywork, &i_work, &querywork, &info);
1237       lwork = (int)(d_work);
1238     }
1239   else
1240     {
1241       pzheevd_(&jobz, uplo, &n,
1242                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1243                DOUBLEP(w),
1244                (void*)COMPLEXP(z), &one,  &one, INTP(desca),
1245                (void*)&c_work, &querywork, &d_work, &querywork,
1246                &i_work, &querywork, &info);
1247       lwork = (int)(c_work);
1248       lrwork = (int)(d_work);
1249     }
1250 
1251   if (info != 0)
1252     {
1253       PyErr_SetString(PyExc_RuntimeError,
1254                       "scalapack_general_diagonalize_dc error in EVD query.");
1255       return NULL;
1256     }
1257 
1258   // EVD Computation
1259   liwork = i_work;
1260   iwork = GPAW_MALLOC(int, liwork);
1261   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1262     {
1263       double* work = GPAW_MALLOC(double, lwork);
1264       pdsyevd_(&jobz, uplo, &n,
1265                DOUBLEP(a), &one, &one, INTP(desca),
1266                DOUBLEP(w),
1267                DOUBLEP(z), &one, &one, INTP(desca),
1268                work, &lwork, iwork, &liwork, &info);
1269       free(work);
1270     }
1271   else
1272     {
1273       double_complex *work = GPAW_MALLOC(double_complex, lwork);
1274       double* rwork = GPAW_MALLOC(double, lrwork);
1275       pzheevd_(&jobz, uplo, &n,
1276                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1277                DOUBLEP(w),
1278                (void*)COMPLEXP(z), &one, &one, INTP(desca),
1279                (void*)work, &lwork, rwork, &lrwork,
1280                iwork, &liwork, &info);
1281       free(rwork);
1282       free(work);
1283     }
1284   free(iwork);
1285 
1286   // Backtransformation to the original problem
1287   char trans;
1288   double d_one = 1.0;
1289   double_complex c_one = 1.0;
1290 
1291   if (*uplo == 'U')
1292     trans = 'N';
1293   else
1294     trans = 'T';
1295 
1296   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1297     pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one,
1298             DOUBLEP(b), &one, &one, INTP(desca),
1299             DOUBLEP(z), &one, &one, INTP(desca));
1300   else
1301     pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one,
1302             (void*)COMPLEXP(b), &one, &one, INTP(desca),
1303             (void*)COMPLEXP(z), &one, &one, INTP(desca));
1304 
1305   PyObject* returnvalue = Py_BuildValue("i", info);
1306   return returnvalue;
1307 }
1308 
scalapack_general_diagonalize_ex(PyObject * self,PyObject * args)1309 PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args)
1310 {
1311   // General driver for bisection and inverse iteration algorithm
1312   // Computes 'iu' eigenvalues and eigenvectors
1313 
1314   PyArrayObject* a; // Hamiltonian matrix
1315   PyArrayObject* b; // overlap matrix
1316   PyArrayObject* desca; // Hamintonian matrix descriptor
1317   PyArrayObject* z; // eigenvector matrix
1318   PyArrayObject* w; // eigenvalue array
1319   int ibtype  =  1; // Solve H*psi = lambda*S*psi
1320   int a_mycol = -1;
1321   int a_myrow = -1;
1322   int a_nprow, a_npcol;
1323   int il = 1;  // not used when range = 'A' or 'V'
1324   int iu;     //
1325   int eigvalm, nz;
1326   int one = 1;
1327 
1328   double vl, vu; // not used when range = 'A' or 'I'
1329 
1330   char jobz = 'V'; // eigenvectors also
1331   char range = 'I'; // eigenvalues il-th through iu-th
1332   char* uplo;
1333 
1334   if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu,
1335                         &b, &z, &w))
1336     return NULL;
1337 
1338   // a desc
1339   int a_ConTxt = INTP(desca)[1];
1340   int a_m      = INTP(desca)[2];
1341   int a_n      = INTP(desca)[3];
1342 
1343   // Only square matrices
1344   assert (a_m == a_n);
1345   int n = a_n;
1346 
1347   // zdesc = adesc = bdesc; required by pdsygvx.f
1348 
1349   // If process not on BLACS grid, then return.
1350   // if (a_ConTxt == -1) Py_RETURN_NONE;
1351 
1352   Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
1353 
1354   // Convergence tolerance
1355   double abstol = 1.0e-8;
1356   // char cmach = 'U'; // most orthogonal eigenvectors
1357   // char cmach = 'S'; // most acccurate eigenvalues
1358   // double abstol = pdlamch_(&a_ConTxt, &cmach);     // most orthogonal eigenvectors
1359   // double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
1360 
1361   double orfac = -1.0;
1362 
1363   // Query part, need to find the optimal size of a number of work arrays
1364   int info;
1365   int *ifail;
1366   ifail = GPAW_MALLOC(int, n);
1367   int *iclustr;
1368   iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
1369   double  *gap;
1370   gap = GPAW_MALLOC(double, a_nprow*a_npcol);
1371   int querywork = -1;
1372   int* iwork;
1373   int liwork;
1374   int lwork;  // workspace size must be at least 3
1375   int lrwork; // workspace size must be at least 3
1376   int i_work;
1377   double d_work[3];
1378   double_complex c_work;
1379 
1380   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1381     {
1382       pdsygvx_(&ibtype, &jobz, &range, uplo, &n,
1383                DOUBLEP(a), &one, &one, INTP(desca),
1384                DOUBLEP(b), &one, &one, INTP(desca),
1385                &vl, &vu, &il, &iu, &abstol, &eigvalm,
1386                &nz, DOUBLEP(w), &orfac,
1387                DOUBLEP(z),  &one, &one, INTP(desca),
1388                d_work, &querywork, &i_work, &querywork,
1389                ifail, iclustr, gap, &info);
1390       lwork = MAX(3, (int)(d_work[0]));
1391     }
1392   else
1393     {
1394       pzhegvx_(&ibtype, &jobz, &range, uplo, &n,
1395                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1396                (void*)COMPLEXP(b), &one, &one, INTP(desca),
1397                &vl, &vu, &il, &iu, &abstol, &eigvalm,
1398                &nz, DOUBLEP(w), &orfac,
1399                (void*)COMPLEXP(z), &one, &one, INTP(desca),
1400                (void*)&c_work, &querywork, d_work, &querywork,
1401                &i_work, &querywork,
1402                ifail, iclustr, gap, &info);
1403       lwork = MAX(3, (int)(c_work));
1404       lrwork = MAX(3, (int)(d_work[0]));
1405     }
1406   if (info != 0) {
1407     PyErr_SetString(PyExc_RuntimeError,
1408                     "scalapack_general_diagonalize_ex error in query.");
1409     return NULL;
1410   }
1411 
1412   // Computation part
1413   // lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
1414   liwork = i_work;
1415   iwork = GPAW_MALLOC(int, liwork);
1416   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1417     {
1418       double* work = GPAW_MALLOC(double, lwork);
1419       pdsygvx_(&ibtype, &jobz, &range, uplo, &n,
1420                DOUBLEP(a), &one, &one, INTP(desca),
1421                DOUBLEP(b), &one, &one, INTP(desca),
1422                &vl, &vu, &il, &iu, &abstol, &eigvalm,
1423                &nz, DOUBLEP(w), &orfac,
1424                DOUBLEP(z), &one, &one,  INTP(desca),
1425                work, &lwork,  iwork, &liwork,
1426                ifail, iclustr, gap, &info);
1427     free(work);
1428     }
1429   else
1430     {
1431       double_complex* work = GPAW_MALLOC(double_complex, lwork);
1432       double* rwork = GPAW_MALLOC(double, lrwork);
1433       pzhegvx_(&ibtype, &jobz, &range, uplo, &n,
1434                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1435                (void*)COMPLEXP(b), &one, &one, INTP(desca),
1436                &vl, &vu, &il, &iu, &abstol, &eigvalm,
1437                &nz, DOUBLEP(w), &orfac,
1438                (void*)COMPLEXP(z), &one, &one, INTP(desca),
1439                (void*)work, &lwork, rwork, &lrwork,
1440                iwork, &liwork,
1441                ifail, iclustr, gap, &info);
1442       free(rwork);
1443       free(work);
1444   }
1445   free(iwork);
1446   free(gap);
1447   free(iclustr);
1448   free(ifail);
1449 
1450   // If this fails, fewer eigenvalues than requested were computed.
1451   assert (eigvalm == iu);
1452   PyObject* returnvalue = Py_BuildValue("i", info);
1453   return returnvalue;
1454 }
1455 
1456 #ifdef GPAW_MR3
scalapack_general_diagonalize_mr3(PyObject * self,PyObject * args)1457 PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args)
1458 {
1459   // General driver for MRRR algorithm
1460   // Computes 'iu' eigenvalues and eigenvectors
1461   // http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
1462 
1463   PyArrayObject* a; // Hamiltonian matrix
1464   PyArrayObject* b; // overlap matrix
1465   PyArrayObject* desca; // Hamintonian matrix descriptor
1466   PyArrayObject* z; // eigenvector matrix
1467   PyArrayObject* w; // eigenvalue array
1468   int ibtype  =  1; // Solve H*psi = lambda*S*psi
1469   int il = 1;  // not used when range = 'A' or 'V'
1470   int iu;
1471   int eigvalm, nz;
1472   int one = 1;
1473 
1474   double vl, vu; // not used when range = 'A' or 'I'
1475 
1476   char jobz = 'V'; // eigenvectors also
1477   char range = 'I'; // eigenvalues il-th through iu-th
1478   char* uplo;
1479 
1480   double scale;
1481 
1482   if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu,
1483                         &b, &z, &w))
1484     return NULL;
1485 
1486   // a desc
1487   // int a_ConTxt = INTP(desca)[1];
1488   int a_m      = INTP(desca)[2];
1489   int a_n      = INTP(desca)[3];
1490 
1491   // Only square matrices
1492   assert (a_m == a_n);
1493   int n = a_n;
1494 
1495   // zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
1496 
1497   // If process not on BLACS grid, then return.
1498   // if (a_ConTxt == -1) Py_RETURN_NONE;
1499 
1500   // Cholesky Decomposition
1501   int info;
1502   if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
1503     pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
1504   else
1505     pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
1506 
1507   if (info != 0)
1508     {
1509       PyErr_SetString(PyExc_RuntimeError,
1510                       "scalapack_general_diagonalize_mr3 error in Cholesky.");
1511       return NULL;
1512     }
1513 
1514   // Query variables
1515   int querywork = -1;
1516   int* iwork;
1517   int liwork;
1518   int lwork;
1519   int lrwork;
1520   int i_work;
1521   double d_work[3];
1522   double_complex c_work;
1523   // NGST Query
1524   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1525     {
1526       pdsyngst_(&ibtype, uplo, &n,
1527                 DOUBLEP(a), &one, &one, INTP(desca),
1528                 DOUBLEP(b), &one, &one, INTP(desca),
1529                 &scale, d_work, &querywork, &info);
1530       lwork = (int)(d_work[0]);
1531     }
1532   else
1533     {
1534       pzhengst_(&ibtype, uplo, &n,
1535                 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1536                 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1537                 &scale, (void*)&c_work, &querywork, &info);
1538       lwork = (int)(c_work);
1539     }
1540 
1541   if (info != 0) {
1542     PyErr_SetString(PyExc_RuntimeError,
1543                     "scalapack_general_diagonalize_mr3 error in NGST query.");
1544     return NULL;
1545   }
1546 
1547   // NGST Compute
1548   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1549     {
1550       double* work = GPAW_MALLOC(double, lwork);
1551       pdsyngst_(&ibtype, uplo, &n,
1552                 DOUBLEP(a), &one, &one, INTP(desca),
1553                 DOUBLEP(b), &one, &one, INTP(desca),
1554                 &scale, work, &lwork, &info);
1555       free(work);
1556     }
1557   else
1558     {
1559       double_complex* work = GPAW_MALLOC(double_complex, lwork);
1560       pzhengst_(&ibtype, uplo, &n,
1561                 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1562                 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1563                 &scale, (void*)work, &lwork, &info);
1564       free(work);
1565     }
1566 
1567   if (info != 0) {
1568     PyErr_SetString(PyExc_RuntimeError,
1569                     "scalapack_general_diagonalize_mr3 error in NGST compute.");
1570     return NULL;
1571   }
1572 
1573   // NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
1574   // may need to rescale eigenvalues by scale. This can be accomplised by using
1575   // the BLAS1 d/zscal. See pdsygvx.f
1576 
1577   // EVR Query
1578   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1579     {
1580       pdsyevr_(&jobz, &range, uplo, &n,
1581                DOUBLEP(a), &one, &one, INTP(desca),
1582                &vl, &vu, &il, &iu, &eigvalm,
1583                &nz, DOUBLEP(w),
1584                DOUBLEP(z), &one, &one, INTP(desca),
1585                d_work, &querywork,  &i_work, &querywork,
1586                &info);
1587       lwork = (int)(d_work[0]);
1588     }
1589   else
1590     {
1591       pzheevr_(&jobz, &range, uplo, &n,
1592                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1593                &vl, &vu, &il, &iu, &eigvalm,
1594                &nz, DOUBLEP(w),
1595                (void*)COMPLEXP(z), &one, &one, INTP(desca),
1596                (void*)&c_work, &querywork, d_work, &querywork,
1597                &i_work, &querywork,
1598                &info);
1599       lwork = (int)(c_work);
1600       lrwork = (int)(d_work[0]);
1601     }
1602 
1603   if (info != 0) {
1604     printf ("info = %d", info);
1605     PyErr_SetString(PyExc_RuntimeError,
1606                     "scalapack_general_diagonalize_evr error in query.");
1607     return NULL;
1608   }
1609 
1610   // EVR Computation
1611   liwork = i_work;
1612   iwork = GPAW_MALLOC(int, liwork);
1613   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1614     {
1615       double* work = GPAW_MALLOC(double, lwork);
1616       pdsyevr_(&jobz, &range, uplo, &n,
1617                DOUBLEP(a), &one, &one, INTP(desca),
1618                &vl, &vu, &il, &iu, &eigvalm,
1619                &nz, DOUBLEP(w),
1620                DOUBLEP(z), &one, &one, INTP(desca),
1621                work, &lwork, iwork, &liwork,
1622                &info);
1623       free(work);
1624     }
1625   else
1626     {
1627       double_complex* work = GPAW_MALLOC(double_complex, lwork);
1628       double* rwork = GPAW_MALLOC(double, lrwork);
1629       pzheevr_(&jobz, &range, uplo, &n,
1630                (void*)COMPLEXP(a), &one, &one, INTP(desca),
1631                &vl, &vu, &il, &iu, &eigvalm,
1632                &nz, DOUBLEP(w),
1633                (void*)COMPLEXP(z), &one, &one, INTP(desca),
1634                (void*)work, &lwork, rwork, &lrwork,
1635                iwork, &liwork,
1636                &info);
1637       free(rwork);
1638       free(work);
1639     }
1640   free(iwork);
1641 
1642   // Backtransformation to the original problem
1643   char trans;
1644   double d_one = 1.0;
1645   double_complex c_one = 1.0;
1646 
1647   if (*uplo == 'U')
1648     trans = 'N';
1649   else
1650     trans = 'T';
1651 
1652   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1653     pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one,
1654             DOUBLEP(b), &one, &one, INTP(desca),
1655             DOUBLEP(z), &one, &one, INTP(desca));
1656   else
1657     pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one,
1658             (void*)COMPLEXP(b), &one, &one, INTP(desca),
1659             (void*)COMPLEXP(z), &one, &one, INTP(desca));
1660 
1661   // If this fails, fewer eigenvalues than requested were computed.
1662   assert (eigvalm == iu);
1663   PyObject* returnvalue = Py_BuildValue("i", info);
1664   return returnvalue;
1665 }
1666 #endif
1667 
scalapack_inverse_cholesky(PyObject * self,PyObject * args)1668 PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args)
1669 {
1670   // Cholesky plus inverse of triangular matrix
1671 
1672   PyArrayObject* a; // overlap matrix
1673   PyArrayObject* desca; // symmetric matrix description vector
1674   int info;
1675   double d_zero = 0.0;
1676   double_complex c_zero = 0.0;
1677   int one = 1;
1678   int two = 2;
1679 
1680   char diag = 'N'; // non-unit triangular
1681   char* uplo;
1682 
1683   if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo))
1684     return NULL;
1685 
1686   // adesc
1687   // int a_ConTxt = INTP(desca)[1];
1688   int a_m      = INTP(desca)[2];
1689   int a_n      = INTP(desca)[3];
1690 
1691   // Only square matrices
1692   assert (a_m == a_n);
1693   int n = a_n;
1694   int p = a_n - 1;
1695 
1696   // If process not on BLACS grid, then return.
1697   // if (a_ConTxt == -1) Py_RETURN_NONE;
1698 
1699   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1700     {
1701       pdpotrf_(uplo, &n, DOUBLEP(a), &one, &one,
1702                INTP(desca), &info);
1703       if (info == 0)
1704         {
1705           pdtrtri_(uplo, &diag, &n, DOUBLEP(a), &one, &one,
1706                    INTP(desca), &info);
1707           if (*uplo == 'L')
1708             pdlaset_("U", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
1709                      &one, &two, INTP(desca));
1710           else
1711             pdlaset_("L", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
1712                  &two, &one, INTP(desca));
1713         }
1714     }
1715   else
1716     {
1717       pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one,
1718                INTP(desca), &info);
1719       if (info == 0)
1720         {
1721           pztrtri_(uplo, &diag, &n, (void*)COMPLEXP(a), &one, &one,
1722                    INTP(desca), &info);
1723           if (*uplo == 'L')
1724             pzlaset_("U", &p, &p, (void*)&c_zero, (void*)&c_zero,
1725                      (void*)COMPLEXP(a), &one, &two, INTP(desca));
1726           else
1727             pzlaset_("L", &p, &p, (void*)&c_zero, (void*)&c_zero,
1728                      (void*)COMPLEXP(a), &two, &one, INTP(desca));
1729         }
1730     }
1731 
1732   PyObject* returnvalue = Py_BuildValue("i", info);
1733   return returnvalue;
1734 }
1735 
scalapack_inverse(PyObject * self,PyObject * args)1736 PyObject* scalapack_inverse(PyObject *self, PyObject *args)
1737 {
1738   // Inverse of an hermitean matrix
1739   PyArrayObject* a; // Matrix
1740   PyArrayObject* desca; // Matrix description vector
1741   char* uplo;
1742   int info;
1743   int one = 1;
1744   if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo))
1745     return NULL;
1746 
1747   int a_m      = INTP(desca)[2];
1748   int a_n      = INTP(desca)[3];
1749   // Only square matrices
1750   assert (a_m == a_n);
1751 
1752   int n = a_n;
1753 
1754   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1755      {
1756       assert(1==-1);       // No double version implemented
1757      }
1758   else
1759     {
1760       pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
1761       if (info == 0)
1762       {
1763         pzpotri_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
1764       }
1765     }
1766   PyObject* returnvalue = Py_BuildValue("i", info);
1767   return returnvalue;
1768 }
1769 
1770 /*
1771 PyObject* scalapack_solve(PyObject *self, PyObject *args)
1772 {
1773   // Solves equation Ax = B, where A is a general matrix
1774   PyArrayObject* a; // Matrix
1775   PyArrayObject* desca; // Matrix description vector
1776   PyArrayObject* b; // Matrix
1777   PyArrayObject* descb; // Matrix description vector
1778   char uplo;
1779   int info;
1780   int one = 1;
1781   if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
1782     return NULL;
1783 
1784   int a_m      = INTP(desca)[2];
1785   int a_n      = INTP(desca)[3];
1786   // Only square matrices
1787   assert (a_m == a_n);
1788 
1789   int b_m      = INTP(descb)[2];
1790   int b_n      = INTP(descb)[3];
1791   // Equation valid
1792   assert (a_n == b_m);
1793 
1794   int n = a_n;
1795   int nrhs = b_n;
1796 
1797   int* pivot = GPAW_MALLOC(int, a_m+2000); // TODO: How long should this exaclty be?
1798 
1799   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1800      {
1801       assert(1==-1);       // No double version implemented
1802      }
1803   else
1804     {
1805        pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
1806                (void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
1807     }
1808   free(pivot);
1809   PyObject* returnvalue = Py_BuildValue("i", info);
1810   return returnvalue;
1811 }
1812 */
1813 
scalapack_solve(PyObject * self,PyObject * args)1814 PyObject* scalapack_solve(PyObject *self, PyObject *args) {
1815   // Solves equation Ax = B, where A is a general matrix
1816   PyArrayObject* a; // Matrix
1817   PyArrayObject* desca; // Matrix description vector
1818   PyArrayObject* b; // Matrix
1819   PyArrayObject* descb; // Matrix description vector
1820   int info;
1821   int one = 1;
1822   if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
1823     return NULL;
1824 
1825   int a_ConTxt = INTP(desca)[1];
1826   int a_m      = INTP(desca)[2];
1827   int a_n      = INTP(desca)[3];
1828   int a_mb     = INTP(desca)[4];
1829   // Only square matrices
1830   assert (a_m == a_n);
1831 
1832   int b_m      = INTP(descb)[2];
1833   int b_n      = INTP(descb)[3];
1834   // Equation valid
1835   assert (a_n == b_m);
1836 
1837   int n = a_n;
1838   int nrhs = b_n;
1839 
1840   int nprow, npcol, myrow, mycol, locM;
1841 
1842   Cblacs_gridinfo_(a_ConTxt, &nprow, &npcol, &myrow, &mycol);
1843   // LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
1844   locM = (((a_m/a_mb) + 1)/nprow + 1) * a_mb;
1845 
1846   /*
1847    *  IPIV    (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A )
1848    *          This array contains the pivoting information.
1849    *          IPIV(i) -> The global row local row i was swapped with.
1850    *          This array is tied to the distributed matrix A.
1851 
1852    *  An upper bound for these quantities may be computed by:
1853    *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
1854 
1855    *  M_A    (global) DESCA( M_ )    The number of rows in the global
1856    *                                 array A.
1857 
1858    *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
1859    *                                 the rows of the array.
1860 
1861    *  NPROW   (global input) INTEGER
1862    *          NPROW specifies the number of process rows in the grid
1863    *          to be created.
1864    */
1865 
1866   int* pivot = GPAW_MALLOC(int, locM + a_mb);
1867 
1868   //if (a->descr->type_num == PyArray_DOUBLE)
1869   if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1870     {
1871       pdgesv_(&n, &nrhs,(double*)DOUBLEP(a), &one, &one, INTP(desca), pivot,
1872               (double*)DOUBLEP(b), &one, &one, INTP(descb), &info);
1873     }
1874   else
1875     {
1876       pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
1877               (void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
1878     }
1879   free(pivot);
1880   PyObject* returnvalue = Py_BuildValue("i", info);
1881   return returnvalue;
1882 }
1883 
1884 #endif
1885 #endif // PARALLEL
1886