1 /* Copyright (C) 2003-2007 CAMP
2 * Copyright (C) 2007-2009 CAMd
3 * Copyright (C) 2010 Argonne National Laboratory
4 * Please see the accompanying LICENSE file for further information. */
5
6 #ifdef PARALLEL
7 #include <Python.h>
8 #ifdef GPAW_WITH_SL
9 #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
10 #define NO_IMPORT_ARRAY
11 #include <numpy/arrayobject.h>
12 #include <stdlib.h>
13 #include <mpi.h>
14 #include <structmember.h>
15 #include "extensions.h"
16 #include "mympi.h"
17
18 // BLACS
19 #define BLOCK_CYCLIC_2D 1
20
21 #ifdef GPAW_NO_UNDERSCORE_CBLACS
22 #define Cblacs_barrier_ Cblacs_barrier
23 #define Cblacs_gridexit_ Cblacs_gridexit
24 #define Cblacs_gridinfo_ Cblacs_gridinfo
25 #define Cblacs_gridinit_ Cblacs_gridinit
26 #define Cblacs_pinfo_ Cblacs_pinfo
27 #define Csys2blacs_handle_ Csys2blacs_handle
28 #endif
29
30 void Cblacs_barrier_(int ConTxt, char *scope);
31
32 void Cblacs_gridexit_(int ConTxt);
33
34 void Cblacs_gridinfo_(int ConTxt, int* nprow, int* npcol,
35 int* myrow, int* mycol);
36
37 void Cblacs_gridinit_(int* ConTxt, char* order, int nprow, int npcol);
38
39 void Cblacs_pinfo_(int* mypnum, int* nprocs);
40
41 int Csys2blacs_handle_(MPI_Comm SysCtxt);
42 // End of BLACS
43
44 // ScaLAPACK
45 #ifdef GPAW_NO_UNDERSCORE_SCALAPACK
46 #define numroc_ numroc
47 #define pdlamch_ pdlamch
48 #define pdlaset_ pdlaset
49 #define pzlaset_ pzlaset
50
51 #define pdpotrf_ pdpotrf
52 #define pzpotrf_ pzpotrf
53 #define pzpotri_ pzpotri
54 #define pdtrtri_ pdtrtri
55 #define pztrtri_ pztrtri
56
57 #define pzgesv_ pzgesv
58 #define pdgesv_ pdgesv
59
60 #define pdsyevd_ pdsyevd
61 #define pzheevd_ pzheevd
62 #define pdsyevx_ pdsyevx
63 #define pzheevx_ pzheevx
64 #define pdsygvx_ pdsygvx
65 #define pzhegvx_ pzhegvx
66 #define pdsyngst_ pdsyngst
67 #define pzhengst_ pzhengst
68 #ifdef GPAW_MR3
69 #define pdsyevr_ pdsyevr
70 #define pzheevr_ pzheevr
71 #endif // GPAW_MR3
72
73 #define pdtran_ pdtran
74 #define pztranc_ pztranc
75 #define pztranu_ pztranu
76 #define pdgemm_ pdgemm
77 #define pzgemm_ pzgemm
78 #define pdgemv_ pdgemv
79 #define pzgemv_ pzgemv
80 #define pdsyr2k_ pdsyr2k
81 #define pzher2k_ pzher2k
82 #define pdsyrk_ pdsyrk
83 #define pzherk_ pzherk
84 #define pdtrsm_ pdtrsm
85 #define pztrsm_ pztrsm
86
87 #define pzhemm_ pzhemm
88 #define pzsymm_ pzsymm
89 #define pdsymm_ pdsymm
90
91 #endif
92
93 #ifdef GPAW_NO_UNDERSCORE_CSCALAPACK
94 #define Cpdgemr2d_ Cpdgemr2d
95 #define Cpzgemr2d_ Cpzgemr2d
96 #define Cpdtrmr2d_ Cpdtrmr2d
97 #define Cpztrmr2d_ Cpztrmr2d
98 #endif
99
100 // tools
101 int numroc_(int* n, int* nb, int* iproc, int* isrcproc, int* nprocs);
102
103 void Cpdgemr2d_(int m, int n,
104 double* a, int ia, int ja, int* desca,
105 double* b, int ib, int jb, int* descb,
106 int gcontext);
107
108 void Cpzgemr2d_(int m, int n,
109 void* a, int ia, int ja, int* desca,
110 void* b, int ib, int jb, int* descb,
111 int gcontext);
112
113 void Cpdtrmr2d_(char* uplo, char* diag, int m, int n,
114 double* a, int ia, int ja, int* desca,
115 double* b, int ib, int jb, int* descb,
116 int gcontext);
117
118 void Cpztrmr2d_(char* uplo, char* diag, int m, int n,
119 void* a, int ia, int ja, int* desca,
120 void* b, int ib, int jb, int* descb,
121 int gcontext);
122
123 double pdlamch_(int* ictxt, char* cmach);
124
125 void pzpotri_(char* uplo, int* n, void* a, int *ia, int* ja, int* desca, int* info);
126
127 void pzgetri_(int* n, void* a,
128 int *ia, int* ja, int* desca, int* info);
129
130 void pdlaset_(char* uplo, int* m, int* n, double* alpha, double* beta,
131 double* a, int* ia, int* ja, int* desca);
132
133 void pzlaset_(char* uplo, int* m, int* n, void* alpha, void* beta,
134 void* a, int* ia, int* ja, int* desca);
135
136 // cholesky
137 void pdpotrf_(char* uplo, int* n, double* a,
138 int* ia, int* ja, int* desca, int* info);
139
140 void pzpotrf_(char* uplo, int* n, void* a,
141 int* ia, int* ja, int* desca, int* info);
142
143 void pzgesv_(int* n, int* nrhs, void* a,
144 int* ia, int* ja, int* desca, int* ipiv,
145 void* b, int* ib, int* jb, int* descb, int* info);
146
147 void pdgesv_(int *n, int *nrhs, void *a,
148 int *ia, int *ja, int* desca, int *ipiv,
149 void* b, int* ib, int* jb, int* descb, int* info);
150
151
152 void pdtrtri_(char* uplo, char* diag, int* n, double* a,
153 int *ia, int* ja, int* desca, int* info);
154
155 void pztrtri_(char* uplo, char* diag, int* n, void* a,
156 int *ia, int* ja, int* desca, int* info);
157
158 // diagonalization
159 void pdsyevd_(char* jobz, char* uplo, int* n,
160 double* a, int* ia, int* ja, int* desca,
161 double* w, double* z, int* iz, int* jz,
162 int* descz, double* work, int* lwork, int* iwork,
163 int* liwork, int* info);
164
165 void pzheevd_(char* jobz, char* uplo, int* n,
166 void* a, int* ia, int* ja, int* desca,
167 double* w, void* z, int* iz, int* jz,
168 int* descz, void* work, int* lwork, double* rwork,
169 int* lrwork, int* iwork, int* liwork, int* info);
170
171 void pdsyevx_(char* jobz, char* range,
172 char* uplo, int* n,
173 double* a, int* ia, int* ja, int* desca,
174 double* vl, double* vu,
175 int* il, int* iu, double* abstol,
176 int* m, int* nz, double* w, double* orfac,
177 double* z, int* iz, int* jz, int* descz,
178 double* work, int* lwork, int* iwork, int* liwork,
179 int* ifail, int* iclustr, double* gap, int* info);
180
181 void pzheevx_(char* jobz, char* range,
182 char* uplo, int* n,
183 void* a, int* ia, int* ja, int* desca,
184 double* vl, double* vu,
185 int* il, int* iu, double* abstol,
186 int* m, int* nz, double* w, double* orfac,
187 void* z, int* iz, int* jz, int* descz,
188 void* work, int* lwork, double* rwork, int* lrwork,
189 int* iwork, int* liwork,
190 int* ifail, int* iclustr, double* gap, int* info);
191
192 void pdsygvx_(int* ibtype, char* jobz, char* range,
193 char* uplo, int* n,
194 double* a, int* ia, int* ja, int* desca,
195 double* b, int *ib, int* jb, int* descb,
196 double* vl, double* vu,
197 int* il, int* iu, double* abstol,
198 int* m, int* nz, double* w, double* orfac,
199 double* z, int* iz, int* jz, int* descz,
200 double* work, int* lwork, int* iwork, int* liwork,
201 int* ifail, int* iclustr, double* gap, int* info);
202
203 void pzhegvx_(int* ibtype, char* jobz, char* range,
204 char* uplo, int* n,
205 void* a, int* ia, int* ja, int* desca,
206 void* b, int *ib, int* jb, int* descb,
207 double* vl, double* vu,
208 int* il, int* iu, double* abstol,
209 int* m, int* nz, double* w, double* orfac,
210 void* z, int* iz, int* jz, int* descz,
211 void* work, int* lwork, double* rwork, int* lrwork,
212 int* iwork, int* liwork,
213 int* ifail, int* iclustr, double* gap, int* info);
214
215 void pdsyngst_(int* ibtype, char* uplo, int* n,
216 double* a, int* ia, int* ja, int* desca,
217 double* b, int* ib, int* jb, int* descb,
218 double* scale, double* work, int* lwork, int* info);
219
220 void pzhengst_(int* ibtype, char* uplo, int* n,
221 void* a, int* ia, int* ja, int* desca,
222 void* b, int* ib, int* jb, int* descb,
223 double* scale, void* work, int* lwork, int* info);
224
225 #ifdef GPAW_MR3
226 void pdsyevr_(char* jobz, char* range,
227 char* uplo, int* n,
228 double* a, int* ia, int* ja, int* desca,
229 double* vl, double* vu,
230 int* il, int* iu,
231 int* m, int* nz, double* w,
232 double* z, int* iz, int* jz, int* descz,
233 double* work, int* lwork, int* iwork, int* liwork,
234 int* info);
235
236 void pzheevr_(char* jobz, char* range,
237 char* uplo, int* n,
238 void* a, int* ia, int* ja, int* desca,
239 double* vl, double* vu,
240 int* il, int* iu,
241 int* m, int* nz, double* w,
242 void* z, int* iz, int* jz, int* descz,
243 void* work, int* lwork, double* rwork, int* lrwork,
244 int* iwork, int* liwork,
245 int* info);
246 #endif // GPAW_MR3
247
248 // pblas
249 void pdtran_(int* m, int* n,
250 double* alpha,
251 double* a, int* ia, int* ja, int* desca,
252 double* beta,
253 double* c, int* ic, int* jc, int* descc);
254
255 void pztranc_(int* m, int* n,
256 void* alpha,
257 void* a, int* ia, int* ja, int* desca,
258 void* beta,
259 void* c, int* ic, int* jc, int* descc);
260
261 void pztranu_(int* m, int* n,
262 void* alpha,
263 void* a, int* ia, int* ja, int* desca,
264 void* beta,
265 void* c, int* ic, int* jc, int* descc);
266
267 void pdgemm_(char* transa, char* transb, int* m, int* n, int* k,
268 double* alpha,
269 double* a, int* ia, int* ja, int* desca,
270 double* b, int* ib, int* jb, int* descb,
271 double* beta,
272 double* c, int* ic, int* jc, int* descc);
273
274 void pzgemm_(char* transa, char* transb, int* m, int* n, int* k,
275 void* alpha,
276 void* a, int* ia, int* ja, int* desca,
277 void* b, int* ib, int* jb, int* descb,
278 void* beta,
279 void* c, int* ic, int* jc, int* descc);
280
281 void pzhemm_(char* side, char* uplo, int* m, int* n,
282 void* alpha,
283 void* a, int* ia, int* ja, int* desca,
284 void* b, int* ib, int* jb, int* descb,
285 void* beta,
286 void* c, int* ic, int* jc, int* descc);
287
288 void pzsymm_(char* side, char* uplo, int* m, int* n,
289 void* alpha,
290 void* a, int* ia, int* ja, int* desca,
291 void* b, int* ib, int* jb, int* descb,
292 void* beta,
293 void* c, int* ic, int* jc, int* descc);
294
295 void pdsymm_(char* side, char* uplo, int* m, int* n,
296 void* alpha,
297 void* a, int* ia, int* ja, int* desca,
298 void* b, int* ib, int* jb, int* descb,
299 void* beta,
300 void* c, int* ic, int* jc, int* descc);
301
302 void pdgemv_(char* transa, int* m, int* n, double* alpha,
303 double* a, int* ia, int* ja, int* desca,
304 double* x, int* ix, int* jx, int* descx, int* incx,
305 double* beta,
306 double* y, int* iy, int* jy, int* descy, int* incy);
307
308 void pzgemv_(char* transa, int* m, int* n, void* alpha,
309 void* a, int* ia, int* ja, int* desca,
310 void* x, int* ix, int* jx, int* descx, int* incx,
311 void* beta,
312 void* y, int* iy, int* jy, int* descy, int* incy);
313
314 void pdsyr2k_(char* uplo, char* trans, int* n, int* k,
315 double* alpha,
316 double* a, int* ia, int* ja, int* desca,
317 double* b, int* ib, int* jb, int* descb,
318 double* beta,
319 double* c, int* ic, int *jc, int* descc);
320
321 void pzher2k_(char* uplo, char* trans, int* n, int* k,
322 void* alpha,
323 void* a, int* ia, int* ja, int* desca,
324 void* b, int* ib, int* jb, int* descb,
325 void* beta,
326 void* c, int* ic, int* jc, int* descc);
327
328 void pdsyrk_(char* uplo, char* trans, int* n, int* k,
329 double* alpha,
330 double* a, int* ia, int* ja, int* desca,
331 double* beta,
332 double* c, int* ic, int* jc, int* descc);
333
334 void pzherk_(char* uplo, char* trans, int* n, int* k,
335 void* alpha,
336 void* a, int* ia, int* ja, int* desca,
337 void* beta,
338 void* c, int* ic, int* jc, int* descc);
339
340 void pdtrsm_(char* side, char* uplo, char* trans, char* diag,
341 int* m, int *n, double* alpha,
342 double* a, int* ia, int* ja, int* desca,
343 double* b, int* ib, int* jb, int* descb);
344
345 void pztrsm_(char* side, char* uplo, char* trans, char* diag,
346 int* m, int *n, void* alpha,
347 void* a, int* ia, int* ja, int* desca,
348 void* b, int* ib, int* jb, int* descb);
349
350
pblas_tran(PyObject * self,PyObject * args)351 PyObject* pblas_tran(PyObject *self, PyObject *args)
352 {
353 int m, n;
354 Py_complex alpha;
355 Py_complex beta;
356 PyArrayObject *a, *c;
357 PyArrayObject *desca, *descc;
358 int conj;
359
360 if (!PyArg_ParseTuple(args, "iiDODOOOi", &m, &n, &alpha,
361 &a, &beta, &c,
362 &desca, &descc,
363 &conj))
364 return NULL;
365
366 int one = 1;
367 if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
368 pdtran_(&m, &n,
369 &(alpha.real),
370 DOUBLEP(a), &one, &one, INTP(desca),
371 &(beta.real),
372 DOUBLEP(c), &one, &one, INTP(descc));
373 else if (conj)
374 pztranc_(&m, &n,
375 &alpha,
376 (void*)PyArray_DATA(a), &one, &one, INTP(desca),
377 &beta,
378 (void*)PyArray_DATA(c), &one, &one, INTP(descc));
379 else
380 pztranu_(&m, &n,
381 &alpha,
382 (void*)PyArray_DATA(a), &one, &one, INTP(desca),
383 &beta,
384 (void*)PyArray_DATA(c), &one, &one, INTP(descc));
385 Py_RETURN_NONE;
386 }
387
pblas_gemm(PyObject * self,PyObject * args)388 PyObject* pblas_gemm(PyObject *self, PyObject *args)
389 {
390 char* transa;
391 char* transb;
392 int m, n, k;
393 Py_complex alpha;
394 Py_complex beta;
395 PyArrayObject *a, *b, *c;
396 PyArrayObject *desca, *descb, *descc;
397 int one = 1;
398
399 if (!PyArg_ParseTuple(args, "iiiDOODOOOOss", &m, &n, &k, &alpha,
400 &a, &b, &beta, &c,
401 &desca, &descb, &descc,
402 &transa, &transb)) {
403 return NULL;
404 }
405
406 // cdesc
407 // int c_ConTxt = INTP(descc)[1];
408
409 // If process not on BLACS grid, then return.
410 // if (c_ConTxt == -1) Py_RETURN_NONE;
411
412 if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
413 pdgemm_(transa, transb, &m, &n, &k,
414 &(alpha.real),
415 DOUBLEP(a), &one, &one, INTP(desca),
416 DOUBLEP(b), &one, &one, INTP(descb),
417 &(beta.real),
418 DOUBLEP(c), &one, &one, INTP(descc));
419 else
420 pzgemm_(transa, transb, &m, &n, &k,
421 &alpha,
422 (void*)COMPLEXP(a), &one, &one, INTP(desca),
423 (void*)COMPLEXP(b), &one, &one, INTP(descb),
424 &beta,
425 (void*)COMPLEXP(c), &one, &one, INTP(descc));
426
427 Py_RETURN_NONE;
428 }
429
430
pblas_hemm_symm(PyObject * self,PyObject * args)431 PyObject* pblas_hemm_symm(PyObject *self, PyObject *args)
432 {
433 char* side;
434 char* uplo;
435 int m, n;
436 Py_complex alpha;
437 Py_complex beta;
438 PyArrayObject *a, *b, *c;
439 PyArrayObject *desca, *descb, *descc;
440 int hemm;
441 int one = 1;
442 if (!PyArg_ParseTuple(args, "ssiiDOODOOOOi",
443 &side, &uplo, &n, &m,
444 &alpha, &a, &b, &beta,
445 &c, &desca, &descb, &descc,
446 &hemm)) {
447 return NULL;
448 }
449
450 if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) {
451 pdsymm_(side, uplo, &n, &m, &(alpha.real),
452 (void*)DOUBLEP(a), &one, &one, INTP(desca),
453 (void*)DOUBLEP(b), &one, &one, INTP(descb),
454 &(beta.real),
455 (void*)DOUBLEP(c), &one, &one, INTP(descc));
456 } else if (hemm) {
457 pzhemm_(side, uplo, &n, &m, &alpha,
458 (void*)COMPLEXP(a), &one, &one, INTP(desca),
459 (void*)COMPLEXP(b), &one, &one, INTP(descb),
460 &beta,
461 (void*)COMPLEXP(c), &one, &one, INTP(descc));
462 } else {
463 pzsymm_(side, uplo, &n, &m, &alpha,
464 (void*)COMPLEXP(a), &one, &one, INTP(desca),
465 (void*)COMPLEXP(b), &one, &one, INTP(descb),
466 &beta,
467 (void*)COMPLEXP(c), &one, &one, INTP(descc));
468 }
469
470 Py_RETURN_NONE;
471 }
472
pblas_gemv(PyObject * self,PyObject * args)473 PyObject* pblas_gemv(PyObject *self, PyObject *args)
474 {
475 char* transa;
476 int m, n;
477 Py_complex alpha;
478 Py_complex beta;
479 PyArrayObject *a, *x, *y;
480 int incx = 1, incy = 1; // what should these be?
481 PyArrayObject *desca, *descx, *descy;
482 int one = 1;
483 if (!PyArg_ParseTuple(args, "iiDOODOOOOs",
484 &m, &n, &alpha,
485 &a, &x, &beta, &y,
486 &desca, &descx,
487 &descy, &transa)) {
488 return NULL;
489 }
490
491 // ydesc
492 // int y_ConTxt = INTP(descy)[1];
493
494 // If process not on BLACS grid, then return.
495 // if (y_ConTxt == -1) Py_RETURN_NONE;
496
497 if (PyArray_DESCR(y)->type_num == NPY_DOUBLE)
498 pdgemv_(transa, &m, &n,
499 &(alpha.real),
500 DOUBLEP(a), &one, &one, INTP(desca),
501 DOUBLEP(x), &one, &one, INTP(descx), &incx,
502 &(beta.real),
503 DOUBLEP(y), &one, &one, INTP(descy), &incy);
504 else
505 pzgemv_(transa, &m, &n,
506 &alpha,
507 (void*)COMPLEXP(a), &one, &one, INTP(desca),
508 (void*)COMPLEXP(x), &one, &one, INTP(descx), &incx,
509 &beta,
510 (void*)COMPLEXP(y), &one, &one, INTP(descy), &incy);
511
512 Py_RETURN_NONE;
513 }
514
pblas_r2k(PyObject * self,PyObject * args)515 PyObject* pblas_r2k(PyObject *self, PyObject *args)
516 {
517 char* uplo;
518 int n, k;
519 Py_complex alpha;
520 Py_complex beta;
521 PyArrayObject *a, *b, *c;
522 PyArrayObject *desca, *descb, *descc;
523 int one = 1;
524
525 if (!PyArg_ParseTuple(args, "iiDOODOOOOs", &n, &k, &alpha,
526 &a, &b, &beta, &c,
527 &desca, &descb, &descc,
528 &uplo)) {
529 return NULL;
530 }
531
532 // cdesc
533 // int c_ConTxt = INTP(descc)[1];
534
535 // If process not on BLACS grid, then return.
536 // if (c_ConTxt == -1) Py_RETURN_NONE;
537
538 if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
539 pdsyr2k_(uplo, "T", &n, &k,
540 &(alpha.real),
541 DOUBLEP(a), &one, &one, INTP(desca),
542 DOUBLEP(b), &one, &one, INTP(descb),
543 &(beta.real),
544 DOUBLEP(c), &one, &one, INTP(descc));
545 else
546 pzher2k_(uplo, "C", &n, &k,
547 &alpha,
548 (void*)COMPLEXP(a), &one, &one, INTP(desca),
549 (void*)COMPLEXP(b), &one, &one, INTP(descb),
550 &beta,
551 (void*)COMPLEXP(c), &one, &one, INTP(descc));
552
553 Py_RETURN_NONE;
554 }
555
pblas_rk(PyObject * self,PyObject * args)556 PyObject* pblas_rk(PyObject *self, PyObject *args)
557 {
558 char* uplo;
559 int n, k;
560 Py_complex alpha;
561 Py_complex beta;
562 PyArrayObject *a, *c;
563 PyArrayObject *desca, *descc;
564 int one = 1;
565
566 if (!PyArg_ParseTuple(args, "iiDODOOOs", &n, &k, &alpha,
567 &a, &beta, &c,
568 &desca, &descc,
569 &uplo)) {
570 return NULL;
571 }
572
573 // cdesc
574 // int c_ConTxt = INTP(descc)[1];
575
576 // If process not on BLACS grid, then return.
577 // if (c_ConTxt == -1) Py_RETURN_NONE;
578
579 if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
580 pdsyrk_(uplo, "T", &n, &k,
581 &(alpha.real),
582 DOUBLEP(a), &one, &one, INTP(desca),
583 &(beta.real),
584 DOUBLEP(c), &one, &one, INTP(descc));
585 else
586 pzherk_(uplo, "C", &n, &k,
587 &alpha,
588 (void*)COMPLEXP(a), &one, &one, INTP(desca),
589 &beta,
590 (void*)COMPLEXP(c), &one, &one, INTP(descc));
591
592 Py_RETURN_NONE;
593 }
594
new_blacs_context(PyObject * self,PyObject * args)595 PyObject* new_blacs_context(PyObject *self, PyObject *args)
596 {
597 PyObject* comm_obj;
598 int nprow, npcol;
599
600 int iam, nprocs;
601 int ConTxt;
602 char* order;
603
604 if (!PyArg_ParseTuple(args, "Oiis", &comm_obj, &nprow, &npcol, &order)){
605 return NULL;
606 }
607
608 // Create blacs grid on this communicator
609 MPI_Comm comm = ((MPIObject*)comm_obj)->comm;
610
611 // Get my id and nprocs. This is for debugging purposes only
612 Cblacs_pinfo_(&iam, &nprocs);
613 MPI_Comm_size(comm, &nprocs);
614
615 // Create blacs grid on this communicator continued
616 ConTxt = Csys2blacs_handle_(comm);
617 Cblacs_gridinit_(&ConTxt, order, nprow, npcol);
618 PyObject* returnvalue = Py_BuildValue("i", ConTxt);
619 return returnvalue;
620 }
621
get_blacs_gridinfo(PyObject * self,PyObject * args)622 PyObject* get_blacs_gridinfo(PyObject *self, PyObject *args)
623 {
624 int ConTxt, nprow, npcol;
625 int myrow, mycol;
626
627 if (!PyArg_ParseTuple(args, "iii", &ConTxt, &nprow, &npcol)) {
628 return NULL;
629 }
630
631 Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
632 return Py_BuildValue("(ii)", myrow, mycol);
633 }
634
635
get_blacs_local_shape(PyObject * self,PyObject * args)636 PyObject* get_blacs_local_shape(PyObject *self, PyObject *args)
637 {
638 int ConTxt;
639 int m, n, mb, nb, rsrc, csrc;
640 int nprow, npcol, myrow, mycol;
641 int locM, locN;
642
643 if (!PyArg_ParseTuple(args, "iiiiiii", &ConTxt, &m, &n, &mb,
644 &nb, &rsrc, &csrc)){
645 return NULL;
646 }
647
648 Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
649 locM = numroc_(&m, &mb, &myrow, &rsrc, &nprow);
650 locN = numroc_(&n, &nb, &mycol, &csrc, &npcol);
651 return Py_BuildValue("(ii)", locM, locN);
652 }
653
blacs_destroy(PyObject * self,PyObject * args)654 PyObject* blacs_destroy(PyObject *self, PyObject *args)
655 {
656 int ConTxt;
657 if (!PyArg_ParseTuple(args, "i", &ConTxt))
658 return NULL;
659
660 Cblacs_gridexit_(ConTxt);
661
662 Py_RETURN_NONE;
663 }
664
scalapack_set(PyObject * self,PyObject * args)665 PyObject* scalapack_set(PyObject *self, PyObject *args)
666 {
667 PyArrayObject* a; // matrix;
668 PyArrayObject* desca; // descriptor
669 Py_complex alpha;
670 Py_complex beta;
671 int m, n;
672 int ia, ja;
673 char* uplo;
674
675 if (!PyArg_ParseTuple(args, "OODDsiiii", &a, &desca,
676 &alpha, &beta, &uplo,
677 &m, &n, &ia, &ja))
678 return NULL;
679
680 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
681 pdlaset_(uplo, &m, &n, &(alpha.real), &(beta.real), DOUBLEP(a),
682 &ia, &ja, INTP(desca));
683 else
684 pzlaset_(uplo, &m, &n, &alpha, &beta, (void*)COMPLEXP(a),
685 &ia, &ja, INTP(desca));
686
687 Py_RETURN_NONE;
688 }
689
scalapack_redist(PyObject * self,PyObject * args)690 PyObject* scalapack_redist(PyObject *self, PyObject *args)
691 {
692 PyArrayObject* a; // source matrix
693 PyArrayObject* b; // destination matrix
694 PyArrayObject* desca; // source descriptor
695 PyArrayObject* descb; // destination descriptor
696
697 char* uplo;
698 char diag='N'; // copy the diagonal
699 int c_ConTxt;
700 int m;
701 int n;
702
703 int ia, ja, ib, jb;
704
705 if (!PyArg_ParseTuple(args, "OOOOiiiiiiis",
706 &desca, &descb,
707 &a, &b,
708 &m, &n,
709 &ia, &ja,
710 &ib, &jb,
711 &c_ConTxt,
712 &uplo))
713 return NULL;
714
715 if (*uplo == 'G') // General matrix
716 {
717 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
718 Cpdgemr2d_(m, n,
719 DOUBLEP(a), ia, ja, INTP(desca),
720 DOUBLEP(b), ib, jb, INTP(descb),
721 c_ConTxt);
722 else
723 Cpzgemr2d_(m, n,
724 (void*)COMPLEXP(a), ia, ja, INTP(desca),
725 (void*)COMPLEXP(b), ib, jb, INTP(descb),
726 c_ConTxt);
727 }
728 else // Trapezoidal matrix
729 {
730 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
731 Cpdtrmr2d_(uplo, &diag, m, n,
732 DOUBLEP(a), ia, ja, INTP(desca),
733 DOUBLEP(b), ib, jb, INTP(descb),
734 c_ConTxt);
735 else
736 Cpztrmr2d_(uplo, &diag, m, n,
737 (void*)COMPLEXP(a), ia, ja, INTP(desca),
738 (void*)COMPLEXP(b), ib, jb, INTP(descb),
739 c_ConTxt);
740 }
741
742 Py_RETURN_NONE;
743 }
744
scalapack_diagonalize_dc(PyObject * self,PyObject * args)745 PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args)
746 {
747 // Standard driver for divide and conquer algorithm
748 // Computes all eigenvalues and eigenvectors
749
750 PyArrayObject* a; // symmetric matrix
751 PyArrayObject* desca; // symmetric matrix description vector
752 PyArrayObject* z; // eigenvector matrix
753 PyArrayObject* w; // eigenvalue array
754 int one = 1;
755
756 char jobz = 'V'; // eigenvectors also
757 char* uplo;
758
759 if (!PyArg_ParseTuple(args, "OOsOO", &a, &desca, &uplo, &z, &w))
760 return NULL;
761
762 // adesc
763 // int a_ConTxt = INTP(desca)[1];
764 int a_m = INTP(desca)[2];
765 int a_n = INTP(desca)[3];
766
767 // zdesc = adesc; this can be relaxed a bit according to pdsyevd.f
768
769 // Only square matrices
770 assert (a_m == a_n);
771 int n = a_n;
772
773 // If process not on BLACS grid, then return.
774 // if (a_ConTxt == -1) Py_RETURN_NONE;
775
776 // Query part, need to find the optimal size of a number of work arrays
777 int info;
778 int querywork = -1;
779 int* iwork;
780 int liwork;
781 int lwork;
782 int lrwork;
783 int i_work;
784 double d_work;
785 double_complex c_work;
786
787 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
788 {
789 pdsyevd_(&jobz, uplo, &n,
790 DOUBLEP(a), &one, &one, INTP(desca),
791 DOUBLEP(w),
792 DOUBLEP(z), &one, &one, INTP(desca),
793 &d_work, &querywork, &i_work, &querywork, &info);
794 lwork = (int)(d_work);
795 // Sometimes lwork is not large enough. Found this formula on
796 // the internet:
797 lwork = MAX(131072, 2 * (int) lwork + 1);
798 }
799 else
800 {
801 pzheevd_(&jobz, uplo, &n,
802 (void*)COMPLEXP(a), &one, &one, INTP(desca),
803 DOUBLEP(w),
804 (void*)COMPLEXP(z), &one, &one, INTP(desca),
805 (void*)&c_work, &querywork, &d_work, &querywork,
806 &i_work, &querywork, &info);
807 lwork = (int)(c_work);
808 lrwork = (int)(d_work);
809 }
810
811 if (info != 0)
812 {
813 PyErr_SetString(PyExc_RuntimeError,
814 "scalapack_diagonalize_dc error in query.");
815 return NULL;
816 }
817
818 // Computation part
819 liwork = MAX(8 * n, i_work + 1);
820 iwork = GPAW_MALLOC(int, liwork);
821 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
822 {
823 double* work = GPAW_MALLOC(double, lwork);
824 pdsyevd_(&jobz, uplo, &n,
825 DOUBLEP(a), &one, &one, INTP(desca),
826 DOUBLEP(w),
827 DOUBLEP(z), &one, &one, INTP(desca),
828 work, &lwork, iwork, &liwork, &info);
829 free(work);
830 }
831 else
832 {
833 double_complex *work = GPAW_MALLOC(double_complex, lwork);
834 double* rwork = GPAW_MALLOC(double, lrwork);
835 pzheevd_(&jobz, uplo, &n,
836 (void*)COMPLEXP(a), &one, &one, INTP(desca),
837 DOUBLEP(w),
838 (void*)COMPLEXP(z), &one, &one, INTP(desca),
839 (void*)work, &lwork, rwork, &lrwork,
840 iwork, &liwork, &info);
841 free(rwork);
842 free(work);
843 }
844 free(iwork);
845
846 PyObject* returnvalue = Py_BuildValue("i", info);
847 return returnvalue;
848 }
849
scalapack_diagonalize_ex(PyObject * self,PyObject * args)850 PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args)
851 {
852 // Standard driver for bisection and inverse iteration algorithm
853 // Computes 'iu' eigenvalues and eigenvectors
854
855 PyArrayObject* a; // Hamiltonian matrix
856 PyArrayObject* desca; // Hamintonian matrix descriptor
857 PyArrayObject* z; // eigenvector matrix
858 PyArrayObject* w; // eigenvalue array
859 int a_mycol = -1;
860 int a_myrow = -1;
861 int a_nprow, a_npcol;
862 int il = 1; // not used when range = 'A' or 'V'
863 int iu;
864 int eigvalm, nz;
865 int one = 1;
866
867 double vl, vu; // not used when range = 'A' or 'I'
868
869 char jobz = 'V'; // eigenvectors also
870 char range = 'I'; // eigenvalues il-th through iu-th
871 char* uplo;
872
873 if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu,
874 &z, &w))
875 return NULL;
876
877 // a desc
878 int a_ConTxt = INTP(desca)[1];
879 int a_m = INTP(desca)[2];
880 int a_n = INTP(desca)[3];
881
882 // Only square matrices
883 assert (a_m == a_n);
884 int n = a_n;
885
886 // zdesc = adesc = bdesc; required by pdsyevx.f
887
888 // If process not on BLACS grid, then return.
889 // if (a_ConTxt == -1) Py_RETURN_NONE;
890
891 Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
892
893 // Convergence tolerance
894 double abstol = 1.0e-8;
895 // char cmach = 'U'; // most orthogonal eigenvectors
896 // char cmach = 'S'; // most acccurate eigenvalues
897 // double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors
898 // double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
899
900 double orfac = -1.0;
901
902 // Query part, need to find the optimal size of a number of work arrays
903 int info;
904 int *ifail;
905 ifail = GPAW_MALLOC(int, n);
906 int *iclustr;
907 iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
908 double *gap;
909 gap = GPAW_MALLOC(double, a_nprow*a_npcol);
910 int querywork = -1;
911 int* iwork;
912 int liwork;
913 int lwork; // workspace size must be at least 3
914 int lrwork; // workspace size must be at least 3
915 int i_work;
916 double d_work[3];
917 double_complex c_work;
918
919 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
920 {
921 pdsyevx_(&jobz, &range, uplo, &n,
922 DOUBLEP(a), &one, &one, INTP(desca),
923 &vl, &vu, &il, &iu, &abstol, &eigvalm,
924 &nz, DOUBLEP(w), &orfac,
925 DOUBLEP(z), &one, &one, INTP(desca),
926 d_work, &querywork, &i_work, &querywork,
927 ifail, iclustr, gap, &info);
928 lwork = MAX(3, (int)(d_work[0]));
929 }
930 else
931 {
932 pzheevx_(&jobz, &range, uplo, &n,
933 (void*)COMPLEXP(a), &one, &one, INTP(desca),
934 &vl, &vu, &il, &iu, &abstol, &eigvalm,
935 &nz, DOUBLEP(w), &orfac,
936 (void*)COMPLEXP(z), &one, &one, INTP(desca),
937 (void*)&c_work, &querywork, d_work, &querywork,
938 &i_work, &querywork,
939 ifail, iclustr, gap, &info);
940 lwork = MAX(3, (int)(c_work));
941 lrwork = MAX(3, (int)(d_work[0]));
942 }
943
944 if (info != 0) {
945 printf ("info = %d", info);
946 PyErr_SetString(PyExc_RuntimeError,
947 "scalapack_diagonalize_ex error in query.");
948 return NULL;
949 }
950
951 // Computation part
952 // lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
953 liwork = i_work;
954 iwork = GPAW_MALLOC(int, liwork);
955
956 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
957 {
958 double* work = GPAW_MALLOC(double, lwork);
959 pdsyevx_(&jobz, &range, uplo, &n,
960 DOUBLEP(a), &one, &one, INTP(desca),
961 &vl, &vu, &il, &iu, &abstol, &eigvalm,
962 &nz, DOUBLEP(w), &orfac,
963 DOUBLEP(z), &one, &one, INTP(desca),
964 work, &lwork, iwork, &liwork,
965 ifail, iclustr, gap, &info);
966 free(work);
967 }
968 else
969 {
970 double_complex* work = GPAW_MALLOC(double_complex, lwork);
971 double* rwork = GPAW_MALLOC(double, lrwork);
972 pzheevx_(&jobz, &range, uplo, &n,
973 (void*)COMPLEXP(a), &one, &one, INTP(desca),
974 &vl, &vu, &il, &iu, &abstol, &eigvalm,
975 &nz, DOUBLEP(w), &orfac,
976 (void*)COMPLEXP(z), &one, &one, INTP(desca),
977 (void*)work, &lwork, rwork, &lrwork,
978 iwork, &liwork,
979 ifail, iclustr, gap, &info);
980 free(rwork);
981 free(work);
982 }
983 free(iwork);
984 free(gap);
985 free(iclustr);
986 free(ifail);
987
988 // If this fails, fewer eigenvalues than requested were computed.
989 assert (eigvalm == iu);
990 PyObject* returnvalue = Py_BuildValue("i", info);
991 return returnvalue;
992 }
993
994 #ifdef GPAW_MR3
scalapack_diagonalize_mr3(PyObject * self,PyObject * args)995 PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args)
996 {
997 // Standard driver for MRRR algorithm
998 // Computes 'iu' eigenvalues and eigenvectors
999 // http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
1000
1001 PyArrayObject* a; // Hamiltonian matrix
1002 PyArrayObject* desca; // Hamintonian matrix descriptor
1003 PyArrayObject* z; // eigenvector matrix
1004 PyArrayObject* w; // eigenvalue array
1005 int il = 1; // not used when range = 'A' or 'V'
1006 int iu;
1007 int eigvalm, nz;
1008 int one = 1;
1009
1010 double vl, vu; // not used when range = 'A' or 'I'
1011
1012 char jobz = 'V'; // eigenvectors also
1013 char range = 'I'; // eigenvalues il-th through iu-th
1014 char* uplo;
1015
1016 if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu,
1017 &z, &w))
1018 return NULL;
1019
1020 // a desc
1021 // int a_ConTxt = INTP(desca)[1];
1022 int a_m = INTP(desca)[2];
1023 int a_n = INTP(desca)[3];
1024
1025 // Only square matrices
1026 assert (a_m == a_n);
1027 int n = a_n;
1028
1029 // zdesc = adesc = bdesc; required by pdsyevx.f
1030
1031 // If process not on BLACS grid, then return.
1032 // if (a_ConTxt == -1) Py_RETURN_NONE;
1033
1034 // Query part, need to find the optimal size of a number of work arrays
1035 int info;
1036 int querywork = -1;
1037 int* iwork;
1038 int liwork;
1039 int lwork;
1040 int lrwork;
1041 int i_work;
1042 double d_work[3];
1043 double_complex c_work;
1044
1045 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1046 {
1047 pdsyevr_(&jobz, &range, uplo, &n,
1048 DOUBLEP(a), &one, &one, INTP(desca),
1049 &vl, &vu, &il, &iu, &eigvalm,
1050 &nz, DOUBLEP(w),
1051 DOUBLEP(z), &one, &one, INTP(desca),
1052 d_work, &querywork, &i_work, &querywork,
1053 &info);
1054 lwork = (int)(d_work[0]);
1055 }
1056 else
1057 {
1058 pzheevr_(&jobz, &range, uplo, &n,
1059 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1060 &vl, &vu, &il, &iu, &eigvalm,
1061 &nz, DOUBLEP(w),
1062 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1063 (void*)&c_work, &querywork, d_work, &querywork,
1064 &i_work, &querywork,
1065 &info);
1066 lwork = (int)(c_work);
1067 lrwork = (int)(d_work[0]);
1068 }
1069
1070 if (info != 0) {
1071 printf ("info = %d", info);
1072 PyErr_SetString(PyExc_RuntimeError,
1073 "scalapack_diagonalize_evr error in query.");
1074 return NULL;
1075 }
1076
1077 // Computation part
1078 liwork = i_work;
1079 iwork = GPAW_MALLOC(int, liwork);
1080
1081 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1082 {
1083 double* work = GPAW_MALLOC(double, lwork);
1084 pdsyevr_(&jobz, &range, uplo, &n,
1085 DOUBLEP(a), &one, &one, INTP(desca),
1086 &vl, &vu, &il, &iu, &eigvalm,
1087 &nz, DOUBLEP(w),
1088 DOUBLEP(z), &one, &one, INTP(desca),
1089 work, &lwork, iwork, &liwork,
1090 &info);
1091 free(work);
1092 }
1093 else
1094 {
1095 double_complex* work = GPAW_MALLOC(double_complex, lwork);
1096 double* rwork = GPAW_MALLOC(double, lrwork);
1097 pzheevr_(&jobz, &range, uplo, &n,
1098 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1099 &vl, &vu, &il, &iu, &eigvalm,
1100 &nz, DOUBLEP(w),
1101 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1102 (void*)work, &lwork, rwork, &lrwork,
1103 iwork, &liwork,
1104 &info);
1105 free(rwork);
1106 free(work);
1107 }
1108 free(iwork);
1109
1110 // If this fails, fewer eigenvalues than requested were computed.
1111 assert (eigvalm == iu);
1112 PyObject* returnvalue = Py_BuildValue("i", info);
1113 return returnvalue;
1114 }
1115 #endif
1116
scalapack_general_diagonalize_dc(PyObject * self,PyObject * args)1117 PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args)
1118 {
1119 // General driver for divide and conquer algorithm
1120 // Computes *all* eigenvalues and eigenvectors
1121
1122 PyArrayObject* a; // Hamiltonian matrix
1123 PyArrayObject* b; // overlap matrix
1124 PyArrayObject* desca; // Hamintonian matrix descriptor
1125 PyArrayObject* z; // eigenvector matrix
1126 PyArrayObject* w; // eigenvalue array
1127 int ibtype = 1; // Solve H*psi = lambda*S*psi
1128 int one = 1;
1129
1130 char jobz = 'V'; // eigenvectors also
1131 char* uplo;
1132
1133 double scale;
1134
1135 if (!PyArg_ParseTuple(args, "OOsOOO", &a, &desca, &uplo,
1136 &b, &z, &w))
1137 return NULL;
1138
1139 // a desc
1140 // int a_ConTxt = INTP(desca)[1];
1141 int a_m = INTP(desca)[2];
1142 int a_n = INTP(desca)[3];
1143
1144 // Only square matrices
1145 assert (a_m == a_n);
1146 int n = a_n;
1147
1148 // zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
1149
1150 // If process not on BLACS grid, then return.
1151 // if (a_ConTxt == -1) Py_RETURN_NONE;
1152
1153 // Cholesky Decomposition
1154 int info;
1155 if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
1156 pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
1157 else
1158 pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
1159
1160 if (info != 0)
1161 {
1162 PyErr_SetString(PyExc_RuntimeError,
1163 "scalapack_general_diagonalize_dc error in Cholesky.");
1164 return NULL;
1165 }
1166
1167 // Query variables
1168 int querywork = -1;
1169 int* iwork;
1170 int liwork;
1171 int lwork;
1172 int lrwork;
1173 int i_work;
1174 double d_work;
1175 double_complex c_work;
1176 // NGST Query
1177 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1178 {
1179 pdsyngst_(&ibtype, uplo, &n,
1180 DOUBLEP(a), &one, &one, INTP(desca),
1181 DOUBLEP(b), &one, &one, INTP(desca),
1182 &scale, &d_work, &querywork, &info);
1183 lwork = (int)(d_work);
1184 }
1185 else
1186 {
1187 pzhengst_(&ibtype, uplo, &n,
1188 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1189 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1190 &scale, (void*)&c_work, &querywork, &info);
1191 lwork = (int)(c_work);
1192 }
1193
1194 if (info != 0) {
1195 PyErr_SetString(PyExc_RuntimeError,
1196 "scalapack_general_diagonalize_dc error in NGST query.");
1197 return NULL;
1198 }
1199 // NGST Compute
1200 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1201 {
1202 double* work = GPAW_MALLOC(double, lwork);
1203 pdsyngst_(&ibtype, uplo, &n,
1204 DOUBLEP(a), &one, &one, INTP(desca),
1205 DOUBLEP(b), &one, &one, INTP(desca),
1206 &scale, work, &lwork, &info);
1207 free(work);
1208 }
1209 else
1210 {
1211 double_complex* work = GPAW_MALLOC(double_complex, lwork);
1212 pzhengst_(&ibtype, uplo, &n,
1213 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1214 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1215 &scale, (void*)work, &lwork, &info);
1216 free(work);
1217 }
1218
1219 if (info != 0) {
1220 PyErr_SetString(PyExc_RuntimeError,
1221 "scalapack_general_diagonalize_dc error in NGST compute.");
1222 return NULL;
1223 }
1224
1225 // NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
1226 // may need to rescale eigenvalues by scale. This can be accomplised by using
1227 // the BLAS1 d/zscal. See pdsygvx.f
1228
1229 // EVD Query
1230 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1231 {
1232 pdsyevd_(&jobz, uplo, &n,
1233 DOUBLEP(a), &one, &one, INTP(desca),
1234 DOUBLEP(w),
1235 DOUBLEP(z), &one, &one, INTP(desca),
1236 &d_work, &querywork, &i_work, &querywork, &info);
1237 lwork = (int)(d_work);
1238 }
1239 else
1240 {
1241 pzheevd_(&jobz, uplo, &n,
1242 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1243 DOUBLEP(w),
1244 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1245 (void*)&c_work, &querywork, &d_work, &querywork,
1246 &i_work, &querywork, &info);
1247 lwork = (int)(c_work);
1248 lrwork = (int)(d_work);
1249 }
1250
1251 if (info != 0)
1252 {
1253 PyErr_SetString(PyExc_RuntimeError,
1254 "scalapack_general_diagonalize_dc error in EVD query.");
1255 return NULL;
1256 }
1257
1258 // EVD Computation
1259 liwork = i_work;
1260 iwork = GPAW_MALLOC(int, liwork);
1261 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1262 {
1263 double* work = GPAW_MALLOC(double, lwork);
1264 pdsyevd_(&jobz, uplo, &n,
1265 DOUBLEP(a), &one, &one, INTP(desca),
1266 DOUBLEP(w),
1267 DOUBLEP(z), &one, &one, INTP(desca),
1268 work, &lwork, iwork, &liwork, &info);
1269 free(work);
1270 }
1271 else
1272 {
1273 double_complex *work = GPAW_MALLOC(double_complex, lwork);
1274 double* rwork = GPAW_MALLOC(double, lrwork);
1275 pzheevd_(&jobz, uplo, &n,
1276 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1277 DOUBLEP(w),
1278 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1279 (void*)work, &lwork, rwork, &lrwork,
1280 iwork, &liwork, &info);
1281 free(rwork);
1282 free(work);
1283 }
1284 free(iwork);
1285
1286 // Backtransformation to the original problem
1287 char trans;
1288 double d_one = 1.0;
1289 double_complex c_one = 1.0;
1290
1291 if (*uplo == 'U')
1292 trans = 'N';
1293 else
1294 trans = 'T';
1295
1296 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1297 pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one,
1298 DOUBLEP(b), &one, &one, INTP(desca),
1299 DOUBLEP(z), &one, &one, INTP(desca));
1300 else
1301 pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one,
1302 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1303 (void*)COMPLEXP(z), &one, &one, INTP(desca));
1304
1305 PyObject* returnvalue = Py_BuildValue("i", info);
1306 return returnvalue;
1307 }
1308
scalapack_general_diagonalize_ex(PyObject * self,PyObject * args)1309 PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args)
1310 {
1311 // General driver for bisection and inverse iteration algorithm
1312 // Computes 'iu' eigenvalues and eigenvectors
1313
1314 PyArrayObject* a; // Hamiltonian matrix
1315 PyArrayObject* b; // overlap matrix
1316 PyArrayObject* desca; // Hamintonian matrix descriptor
1317 PyArrayObject* z; // eigenvector matrix
1318 PyArrayObject* w; // eigenvalue array
1319 int ibtype = 1; // Solve H*psi = lambda*S*psi
1320 int a_mycol = -1;
1321 int a_myrow = -1;
1322 int a_nprow, a_npcol;
1323 int il = 1; // not used when range = 'A' or 'V'
1324 int iu; //
1325 int eigvalm, nz;
1326 int one = 1;
1327
1328 double vl, vu; // not used when range = 'A' or 'I'
1329
1330 char jobz = 'V'; // eigenvectors also
1331 char range = 'I'; // eigenvalues il-th through iu-th
1332 char* uplo;
1333
1334 if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu,
1335 &b, &z, &w))
1336 return NULL;
1337
1338 // a desc
1339 int a_ConTxt = INTP(desca)[1];
1340 int a_m = INTP(desca)[2];
1341 int a_n = INTP(desca)[3];
1342
1343 // Only square matrices
1344 assert (a_m == a_n);
1345 int n = a_n;
1346
1347 // zdesc = adesc = bdesc; required by pdsygvx.f
1348
1349 // If process not on BLACS grid, then return.
1350 // if (a_ConTxt == -1) Py_RETURN_NONE;
1351
1352 Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
1353
1354 // Convergence tolerance
1355 double abstol = 1.0e-8;
1356 // char cmach = 'U'; // most orthogonal eigenvectors
1357 // char cmach = 'S'; // most acccurate eigenvalues
1358 // double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors
1359 // double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
1360
1361 double orfac = -1.0;
1362
1363 // Query part, need to find the optimal size of a number of work arrays
1364 int info;
1365 int *ifail;
1366 ifail = GPAW_MALLOC(int, n);
1367 int *iclustr;
1368 iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
1369 double *gap;
1370 gap = GPAW_MALLOC(double, a_nprow*a_npcol);
1371 int querywork = -1;
1372 int* iwork;
1373 int liwork;
1374 int lwork; // workspace size must be at least 3
1375 int lrwork; // workspace size must be at least 3
1376 int i_work;
1377 double d_work[3];
1378 double_complex c_work;
1379
1380 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1381 {
1382 pdsygvx_(&ibtype, &jobz, &range, uplo, &n,
1383 DOUBLEP(a), &one, &one, INTP(desca),
1384 DOUBLEP(b), &one, &one, INTP(desca),
1385 &vl, &vu, &il, &iu, &abstol, &eigvalm,
1386 &nz, DOUBLEP(w), &orfac,
1387 DOUBLEP(z), &one, &one, INTP(desca),
1388 d_work, &querywork, &i_work, &querywork,
1389 ifail, iclustr, gap, &info);
1390 lwork = MAX(3, (int)(d_work[0]));
1391 }
1392 else
1393 {
1394 pzhegvx_(&ibtype, &jobz, &range, uplo, &n,
1395 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1396 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1397 &vl, &vu, &il, &iu, &abstol, &eigvalm,
1398 &nz, DOUBLEP(w), &orfac,
1399 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1400 (void*)&c_work, &querywork, d_work, &querywork,
1401 &i_work, &querywork,
1402 ifail, iclustr, gap, &info);
1403 lwork = MAX(3, (int)(c_work));
1404 lrwork = MAX(3, (int)(d_work[0]));
1405 }
1406 if (info != 0) {
1407 PyErr_SetString(PyExc_RuntimeError,
1408 "scalapack_general_diagonalize_ex error in query.");
1409 return NULL;
1410 }
1411
1412 // Computation part
1413 // lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
1414 liwork = i_work;
1415 iwork = GPAW_MALLOC(int, liwork);
1416 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1417 {
1418 double* work = GPAW_MALLOC(double, lwork);
1419 pdsygvx_(&ibtype, &jobz, &range, uplo, &n,
1420 DOUBLEP(a), &one, &one, INTP(desca),
1421 DOUBLEP(b), &one, &one, INTP(desca),
1422 &vl, &vu, &il, &iu, &abstol, &eigvalm,
1423 &nz, DOUBLEP(w), &orfac,
1424 DOUBLEP(z), &one, &one, INTP(desca),
1425 work, &lwork, iwork, &liwork,
1426 ifail, iclustr, gap, &info);
1427 free(work);
1428 }
1429 else
1430 {
1431 double_complex* work = GPAW_MALLOC(double_complex, lwork);
1432 double* rwork = GPAW_MALLOC(double, lrwork);
1433 pzhegvx_(&ibtype, &jobz, &range, uplo, &n,
1434 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1435 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1436 &vl, &vu, &il, &iu, &abstol, &eigvalm,
1437 &nz, DOUBLEP(w), &orfac,
1438 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1439 (void*)work, &lwork, rwork, &lrwork,
1440 iwork, &liwork,
1441 ifail, iclustr, gap, &info);
1442 free(rwork);
1443 free(work);
1444 }
1445 free(iwork);
1446 free(gap);
1447 free(iclustr);
1448 free(ifail);
1449
1450 // If this fails, fewer eigenvalues than requested were computed.
1451 assert (eigvalm == iu);
1452 PyObject* returnvalue = Py_BuildValue("i", info);
1453 return returnvalue;
1454 }
1455
1456 #ifdef GPAW_MR3
scalapack_general_diagonalize_mr3(PyObject * self,PyObject * args)1457 PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args)
1458 {
1459 // General driver for MRRR algorithm
1460 // Computes 'iu' eigenvalues and eigenvectors
1461 // http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
1462
1463 PyArrayObject* a; // Hamiltonian matrix
1464 PyArrayObject* b; // overlap matrix
1465 PyArrayObject* desca; // Hamintonian matrix descriptor
1466 PyArrayObject* z; // eigenvector matrix
1467 PyArrayObject* w; // eigenvalue array
1468 int ibtype = 1; // Solve H*psi = lambda*S*psi
1469 int il = 1; // not used when range = 'A' or 'V'
1470 int iu;
1471 int eigvalm, nz;
1472 int one = 1;
1473
1474 double vl, vu; // not used when range = 'A' or 'I'
1475
1476 char jobz = 'V'; // eigenvectors also
1477 char range = 'I'; // eigenvalues il-th through iu-th
1478 char* uplo;
1479
1480 double scale;
1481
1482 if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu,
1483 &b, &z, &w))
1484 return NULL;
1485
1486 // a desc
1487 // int a_ConTxt = INTP(desca)[1];
1488 int a_m = INTP(desca)[2];
1489 int a_n = INTP(desca)[3];
1490
1491 // Only square matrices
1492 assert (a_m == a_n);
1493 int n = a_n;
1494
1495 // zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
1496
1497 // If process not on BLACS grid, then return.
1498 // if (a_ConTxt == -1) Py_RETURN_NONE;
1499
1500 // Cholesky Decomposition
1501 int info;
1502 if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
1503 pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
1504 else
1505 pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
1506
1507 if (info != 0)
1508 {
1509 PyErr_SetString(PyExc_RuntimeError,
1510 "scalapack_general_diagonalize_mr3 error in Cholesky.");
1511 return NULL;
1512 }
1513
1514 // Query variables
1515 int querywork = -1;
1516 int* iwork;
1517 int liwork;
1518 int lwork;
1519 int lrwork;
1520 int i_work;
1521 double d_work[3];
1522 double_complex c_work;
1523 // NGST Query
1524 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1525 {
1526 pdsyngst_(&ibtype, uplo, &n,
1527 DOUBLEP(a), &one, &one, INTP(desca),
1528 DOUBLEP(b), &one, &one, INTP(desca),
1529 &scale, d_work, &querywork, &info);
1530 lwork = (int)(d_work[0]);
1531 }
1532 else
1533 {
1534 pzhengst_(&ibtype, uplo, &n,
1535 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1536 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1537 &scale, (void*)&c_work, &querywork, &info);
1538 lwork = (int)(c_work);
1539 }
1540
1541 if (info != 0) {
1542 PyErr_SetString(PyExc_RuntimeError,
1543 "scalapack_general_diagonalize_mr3 error in NGST query.");
1544 return NULL;
1545 }
1546
1547 // NGST Compute
1548 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1549 {
1550 double* work = GPAW_MALLOC(double, lwork);
1551 pdsyngst_(&ibtype, uplo, &n,
1552 DOUBLEP(a), &one, &one, INTP(desca),
1553 DOUBLEP(b), &one, &one, INTP(desca),
1554 &scale, work, &lwork, &info);
1555 free(work);
1556 }
1557 else
1558 {
1559 double_complex* work = GPAW_MALLOC(double_complex, lwork);
1560 pzhengst_(&ibtype, uplo, &n,
1561 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1562 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1563 &scale, (void*)work, &lwork, &info);
1564 free(work);
1565 }
1566
1567 if (info != 0) {
1568 PyErr_SetString(PyExc_RuntimeError,
1569 "scalapack_general_diagonalize_mr3 error in NGST compute.");
1570 return NULL;
1571 }
1572
1573 // NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
1574 // may need to rescale eigenvalues by scale. This can be accomplised by using
1575 // the BLAS1 d/zscal. See pdsygvx.f
1576
1577 // EVR Query
1578 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1579 {
1580 pdsyevr_(&jobz, &range, uplo, &n,
1581 DOUBLEP(a), &one, &one, INTP(desca),
1582 &vl, &vu, &il, &iu, &eigvalm,
1583 &nz, DOUBLEP(w),
1584 DOUBLEP(z), &one, &one, INTP(desca),
1585 d_work, &querywork, &i_work, &querywork,
1586 &info);
1587 lwork = (int)(d_work[0]);
1588 }
1589 else
1590 {
1591 pzheevr_(&jobz, &range, uplo, &n,
1592 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1593 &vl, &vu, &il, &iu, &eigvalm,
1594 &nz, DOUBLEP(w),
1595 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1596 (void*)&c_work, &querywork, d_work, &querywork,
1597 &i_work, &querywork,
1598 &info);
1599 lwork = (int)(c_work);
1600 lrwork = (int)(d_work[0]);
1601 }
1602
1603 if (info != 0) {
1604 printf ("info = %d", info);
1605 PyErr_SetString(PyExc_RuntimeError,
1606 "scalapack_general_diagonalize_evr error in query.");
1607 return NULL;
1608 }
1609
1610 // EVR Computation
1611 liwork = i_work;
1612 iwork = GPAW_MALLOC(int, liwork);
1613 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1614 {
1615 double* work = GPAW_MALLOC(double, lwork);
1616 pdsyevr_(&jobz, &range, uplo, &n,
1617 DOUBLEP(a), &one, &one, INTP(desca),
1618 &vl, &vu, &il, &iu, &eigvalm,
1619 &nz, DOUBLEP(w),
1620 DOUBLEP(z), &one, &one, INTP(desca),
1621 work, &lwork, iwork, &liwork,
1622 &info);
1623 free(work);
1624 }
1625 else
1626 {
1627 double_complex* work = GPAW_MALLOC(double_complex, lwork);
1628 double* rwork = GPAW_MALLOC(double, lrwork);
1629 pzheevr_(&jobz, &range, uplo, &n,
1630 (void*)COMPLEXP(a), &one, &one, INTP(desca),
1631 &vl, &vu, &il, &iu, &eigvalm,
1632 &nz, DOUBLEP(w),
1633 (void*)COMPLEXP(z), &one, &one, INTP(desca),
1634 (void*)work, &lwork, rwork, &lrwork,
1635 iwork, &liwork,
1636 &info);
1637 free(rwork);
1638 free(work);
1639 }
1640 free(iwork);
1641
1642 // Backtransformation to the original problem
1643 char trans;
1644 double d_one = 1.0;
1645 double_complex c_one = 1.0;
1646
1647 if (*uplo == 'U')
1648 trans = 'N';
1649 else
1650 trans = 'T';
1651
1652 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1653 pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one,
1654 DOUBLEP(b), &one, &one, INTP(desca),
1655 DOUBLEP(z), &one, &one, INTP(desca));
1656 else
1657 pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one,
1658 (void*)COMPLEXP(b), &one, &one, INTP(desca),
1659 (void*)COMPLEXP(z), &one, &one, INTP(desca));
1660
1661 // If this fails, fewer eigenvalues than requested were computed.
1662 assert (eigvalm == iu);
1663 PyObject* returnvalue = Py_BuildValue("i", info);
1664 return returnvalue;
1665 }
1666 #endif
1667
scalapack_inverse_cholesky(PyObject * self,PyObject * args)1668 PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args)
1669 {
1670 // Cholesky plus inverse of triangular matrix
1671
1672 PyArrayObject* a; // overlap matrix
1673 PyArrayObject* desca; // symmetric matrix description vector
1674 int info;
1675 double d_zero = 0.0;
1676 double_complex c_zero = 0.0;
1677 int one = 1;
1678 int two = 2;
1679
1680 char diag = 'N'; // non-unit triangular
1681 char* uplo;
1682
1683 if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo))
1684 return NULL;
1685
1686 // adesc
1687 // int a_ConTxt = INTP(desca)[1];
1688 int a_m = INTP(desca)[2];
1689 int a_n = INTP(desca)[3];
1690
1691 // Only square matrices
1692 assert (a_m == a_n);
1693 int n = a_n;
1694 int p = a_n - 1;
1695
1696 // If process not on BLACS grid, then return.
1697 // if (a_ConTxt == -1) Py_RETURN_NONE;
1698
1699 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1700 {
1701 pdpotrf_(uplo, &n, DOUBLEP(a), &one, &one,
1702 INTP(desca), &info);
1703 if (info == 0)
1704 {
1705 pdtrtri_(uplo, &diag, &n, DOUBLEP(a), &one, &one,
1706 INTP(desca), &info);
1707 if (*uplo == 'L')
1708 pdlaset_("U", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
1709 &one, &two, INTP(desca));
1710 else
1711 pdlaset_("L", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
1712 &two, &one, INTP(desca));
1713 }
1714 }
1715 else
1716 {
1717 pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one,
1718 INTP(desca), &info);
1719 if (info == 0)
1720 {
1721 pztrtri_(uplo, &diag, &n, (void*)COMPLEXP(a), &one, &one,
1722 INTP(desca), &info);
1723 if (*uplo == 'L')
1724 pzlaset_("U", &p, &p, (void*)&c_zero, (void*)&c_zero,
1725 (void*)COMPLEXP(a), &one, &two, INTP(desca));
1726 else
1727 pzlaset_("L", &p, &p, (void*)&c_zero, (void*)&c_zero,
1728 (void*)COMPLEXP(a), &two, &one, INTP(desca));
1729 }
1730 }
1731
1732 PyObject* returnvalue = Py_BuildValue("i", info);
1733 return returnvalue;
1734 }
1735
scalapack_inverse(PyObject * self,PyObject * args)1736 PyObject* scalapack_inverse(PyObject *self, PyObject *args)
1737 {
1738 // Inverse of an hermitean matrix
1739 PyArrayObject* a; // Matrix
1740 PyArrayObject* desca; // Matrix description vector
1741 char* uplo;
1742 int info;
1743 int one = 1;
1744 if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo))
1745 return NULL;
1746
1747 int a_m = INTP(desca)[2];
1748 int a_n = INTP(desca)[3];
1749 // Only square matrices
1750 assert (a_m == a_n);
1751
1752 int n = a_n;
1753
1754 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1755 {
1756 assert(1==-1); // No double version implemented
1757 }
1758 else
1759 {
1760 pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
1761 if (info == 0)
1762 {
1763 pzpotri_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
1764 }
1765 }
1766 PyObject* returnvalue = Py_BuildValue("i", info);
1767 return returnvalue;
1768 }
1769
1770 /*
1771 PyObject* scalapack_solve(PyObject *self, PyObject *args)
1772 {
1773 // Solves equation Ax = B, where A is a general matrix
1774 PyArrayObject* a; // Matrix
1775 PyArrayObject* desca; // Matrix description vector
1776 PyArrayObject* b; // Matrix
1777 PyArrayObject* descb; // Matrix description vector
1778 char uplo;
1779 int info;
1780 int one = 1;
1781 if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
1782 return NULL;
1783
1784 int a_m = INTP(desca)[2];
1785 int a_n = INTP(desca)[3];
1786 // Only square matrices
1787 assert (a_m == a_n);
1788
1789 int b_m = INTP(descb)[2];
1790 int b_n = INTP(descb)[3];
1791 // Equation valid
1792 assert (a_n == b_m);
1793
1794 int n = a_n;
1795 int nrhs = b_n;
1796
1797 int* pivot = GPAW_MALLOC(int, a_m+2000); // TODO: How long should this exaclty be?
1798
1799 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1800 {
1801 assert(1==-1); // No double version implemented
1802 }
1803 else
1804 {
1805 pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
1806 (void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
1807 }
1808 free(pivot);
1809 PyObject* returnvalue = Py_BuildValue("i", info);
1810 return returnvalue;
1811 }
1812 */
1813
scalapack_solve(PyObject * self,PyObject * args)1814 PyObject* scalapack_solve(PyObject *self, PyObject *args) {
1815 // Solves equation Ax = B, where A is a general matrix
1816 PyArrayObject* a; // Matrix
1817 PyArrayObject* desca; // Matrix description vector
1818 PyArrayObject* b; // Matrix
1819 PyArrayObject* descb; // Matrix description vector
1820 int info;
1821 int one = 1;
1822 if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
1823 return NULL;
1824
1825 int a_ConTxt = INTP(desca)[1];
1826 int a_m = INTP(desca)[2];
1827 int a_n = INTP(desca)[3];
1828 int a_mb = INTP(desca)[4];
1829 // Only square matrices
1830 assert (a_m == a_n);
1831
1832 int b_m = INTP(descb)[2];
1833 int b_n = INTP(descb)[3];
1834 // Equation valid
1835 assert (a_n == b_m);
1836
1837 int n = a_n;
1838 int nrhs = b_n;
1839
1840 int nprow, npcol, myrow, mycol, locM;
1841
1842 Cblacs_gridinfo_(a_ConTxt, &nprow, &npcol, &myrow, &mycol);
1843 // LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
1844 locM = (((a_m/a_mb) + 1)/nprow + 1) * a_mb;
1845
1846 /*
1847 * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A )
1848 * This array contains the pivoting information.
1849 * IPIV(i) -> The global row local row i was swapped with.
1850 * This array is tied to the distributed matrix A.
1851
1852 * An upper bound for these quantities may be computed by:
1853 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
1854
1855 * M_A (global) DESCA( M_ ) The number of rows in the global
1856 * array A.
1857
1858 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
1859 * the rows of the array.
1860
1861 * NPROW (global input) INTEGER
1862 * NPROW specifies the number of process rows in the grid
1863 * to be created.
1864 */
1865
1866 int* pivot = GPAW_MALLOC(int, locM + a_mb);
1867
1868 //if (a->descr->type_num == PyArray_DOUBLE)
1869 if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
1870 {
1871 pdgesv_(&n, &nrhs,(double*)DOUBLEP(a), &one, &one, INTP(desca), pivot,
1872 (double*)DOUBLEP(b), &one, &one, INTP(descb), &info);
1873 }
1874 else
1875 {
1876 pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
1877 (void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
1878 }
1879 free(pivot);
1880 PyObject* returnvalue = Py_BuildValue("i", info);
1881 return returnvalue;
1882 }
1883
1884 #endif
1885 #endif // PARALLEL
1886