1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30 /*-*-mode: C; c-indentation-style: "bsd"; c-basic-offset: 4; -*-*/
31 /** @file xc_matrix.cc The XC matrix evaluator.
32    (c) Pawel Salek, pawsa@theochem.kth.se.
33    2002.04.05
34 
35    This module evaluates DFT contribution KS matrix.
36 */
37 
38 #ifdef HAVE_CONFIG_H
39 #include "config.h"
40 #endif
41 
42 #include <assert.h>
43 #include <cmath>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <time.h>
47 #include <sys/times.h>
48 #include <unistd.h>
49 
50 #define WITH_PTHREAD 1
51 #if defined(WITH_PTHREAD)
52 #include <pthread.h>
53 static pthread_mutex_t dft_prop_mutex = PTHREAD_MUTEX_INITIALIZER;
54 #endif
55 
56 #define __CVERSION__
57 #include "aos.h"
58 #include "integrator.h"
59 #include "functionals.h"
60 #include "dft_common.h"
61 
62 #include "mat_gblas.h"
63 #include "output.h"
64 #include "utilities.h"
65 #include "matrix_utilities.h"
66 #include "grid_matrix.h"
67 #include "xc_evaluators.h"
68 
69 /* restrict hints should not be necessary... */
70 #if !defined(restrict)
71 #define restrict
72 #endif
73 
74 const static int KOHNSH_DEBUG = 0;
75 
76 void lrao2mo_(const real* cmo, const int *ksymop,
77               const real*res, real* fmat, real* work, int*lw);
78 
79 #if defined(VAR_MPI)
80 #include <mpi.h>
81 #define MASTER_NO 0
82 #endif
83 #if 0 && defined(VAR_MPI)
84 #include <mpi.h>
85 #define MASTER_NO 0
86 
87 /* dft_kohn_sham_slave:
88    this is a slave driver. It's task is to allocate memory needed by
89    the main property evaluator (dft_kohn_sham in this case) and call it.
90 */
91 void
92 dft_kohn_sham_slave(real* work, int* lwork, const int* iprint)
93 {
94     real* dmat = malloc(inforb_.n2basx*sizeof(real));
95     real* ksm  = calloc(inforb_.n2basx,sizeof(real));
96     int iprfck = 0;
97     dft_kohn_sham_(dmat, ksm, work, lwork, &iprfck);
98     free(dmat);
99     free(ksm);
100 }
101 
102 static __inline__ void
103 dft_kohn_sham_sync_slaves(real* dmat)
104 {
105     MPI_Bcast(dmat, inforb_.n2basx,MPI_DOUBLE,
106 	      MASTER_NO, MPI_COMM_WORLD);
107 }
108 
109 static __inline__ void
110 dft_kohn_sham_collect_info(real*ksm, real* energy, real* work)
111 {
112     real tmp = *energy;
113     dcopy_(&inforb_.n2basx, ksm,&ONEI, work, &ONEI);
114     MPI_Reduce(work, ksm, inforb_.n2basx, MPI_DOUBLE, MPI_SUM,
115 	       MASTER_NO, MPI_COMM_WORLD);
116     MPI_Reduce(&tmp, energy, 1, MPI_DOUBLE, MPI_SUM,
117 	       MASTER_NO, MPI_COMM_WORLD);
118 }
119 
120 #else /* VAR_MPI */
121 #define dft_kohn_sham_sync_slaves(dmat)
122 #define dft_kohn_sham_collect_info(myksm, ksm, energy)
123 #endif /* VAR_MPI */
124 
125 /* =================================================================== */
126 /*                    BLOCKED PROPERTY EVALUATORS                      */
127 /* =================================================================== */
128 
129 struct XCDistributorLdaBlas {
130   static void distribute(DftIntegratorBl *grid,
131                          int bllen, int blstart, int blend,
132                          real * restrict tmp, const real *restrict dR,
133                          Dft::FullMatrix& excmat);
134 };
135 
136 void
distribute(DftIntegratorBl * grid,int bllen,int blstart,int blend,real * restrict tmp,const real * restrict dR,Dft::FullMatrix & mat)137 XCDistributorLdaBlas::distribute(DftIntegratorBl *grid,
138                                  int bllen, int blstart, int blend,
139                                  real * restrict tmp, const real *restrict dR,
140                                  Dft::FullMatrix& mat)
141 {
142     int isym, jbl, j, ibl, k;
143     const real * const aos = grid->atv;
144     real * restrict excmat = mat.mat;
145 
146     for(isym=0; isym<grid->nsym; isym++) {
147         int (*restrict blocks)[2] = BASBLOCK(grid,isym);
148         int bl_cnt = grid->bas_bl_cnt[isym];
149 
150         for(jbl=0; jbl<bl_cnt; jbl++)
151             for(j=blocks[jbl][0]; j<blocks[jbl][1]; j++) {
152                 int joff = j*bllen;
153                 for(k=blstart; k<blend; k++)
154                     tmp[k+joff] = aos[k+joff]*dR[k];
155             }
156 
157         for(jbl=0; jbl<bl_cnt; jbl++) {
158 	    static const ergo_real HALF = 0.5;
159 	    static const ergo_real QUARTER = 0.25;
160 	    for(ibl=0; ibl<jbl; ibl++) {
161 		int cRows = blocks[ibl][1]-blocks[ibl][0];
162 		int cCols = blocks[jbl][1]-blocks[jbl][0];
163 		static const ergo_real ONER = 1.0;
164 		mat::gemm("T","N", &cRows, &cCols, &bllen, &HALF,
165 			  aos+blocks[ibl][0]*bllen, &bllen,
166 			  tmp+blocks[jbl][0]*bllen, &bllen, &ONER,
167 			  excmat+blocks[jbl][0]*grid->nbast+blocks[ibl][0],
168 			  &grid->nbast);
169 	    }
170 	    /* This will double-count diagonal elements, need to
171 	       correct for it later. Or maybe not? */
172 	    int cRows = blocks[jbl][1]-blocks[jbl][0];
173 	    int cCols = blocks[jbl][1]-blocks[jbl][0];
174 	    mat::gemm("T","N", &cRows, &cCols, &bllen, &QUARTER,
175 		      aos+blocks[jbl][0]*bllen, &bllen,
176 		      tmp+blocks[jbl][0]*bllen, &bllen, &ONER,
177 		      excmat+blocks[jbl][0]*grid->nbast+blocks[jbl][0],
178 		      &grid->nbast);
179 	}
180     }
181 }
182 
183 struct XCDistributorGgaBlas {
184   static void distribute(DftIntegratorBl *grid,
185                          int bllen, int blstart, int blend,
186                          real * restrict tmp,
187                          const real *dR, const real *dZ,
188                          Dft::FullMatrix& mat);
189 };
190 
191 void
distribute(DftIntegratorBl * grid,int bllen,int blstart,int blend,real * restrict tmp,const real * dR,const real * dZ,Dft::FullMatrix & mat)192 XCDistributorGgaBlas::distribute(DftIntegratorBl *grid,
193                                  int bllen, int blstart, int blend,
194                                  real * restrict tmp,
195                                  const real * dR, const real * dZ,
196                                  Dft::FullMatrix& mat)
197 {
198     int isym, jbl, j, ibl, k;
199     const real * restrict aox = grid->atv+bllen*grid->nbast;
200     const real * restrict aoy = grid->atv+bllen*grid->nbast*2;
201     const real * restrict aoz = grid->atv+bllen*grid->nbast*3;
202     const real * restrict aos = grid->atv;
203     real * restrict excmat = mat.mat;
204 
205     for(isym=0; isym<grid->nsym; isym++) {
206         int (*restrict blocks)[2] = BASBLOCK(grid,isym);
207         int nblocks = grid->bas_bl_cnt[isym];
208         for(jbl=0; jbl<nblocks; jbl++)
209             for(j=blocks[jbl][0]; j<blocks[jbl][1]; j++) {
210                 int joff = j*bllen;
211                 for(k=0; k<bllen; k++)
212                     tmp[k+joff] =
213                         dR[k]* aos[k+j*bllen] +
214                         dZ[k]*(aox[k+j*bllen]*grid->g.rad.a[k][0]+
215                                aoy[k+j*bllen]*grid->g.rad.a[k][1]+
216                                aoz[k+j*bllen]*grid->g.rad.a[k][2]);
217         }
218 
219         for(jbl=0; jbl<nblocks; jbl++) {
220 	    for(ibl=0; ibl<nblocks; ibl++) {
221 		int cRows = blocks[ibl][1]-blocks[ibl][0];
222 		int cCols = blocks[jbl][1]-blocks[jbl][0];
223 		static const ergo_real ONER = 1.0;
224 		mat::gemm("T","N", &cRows, &cCols, &bllen, &ONER,
225 			  aos+blocks[ibl][0]*bllen, &bllen,
226 			  tmp+blocks[jbl][0]*bllen, &bllen, &ONER,
227 			  excmat+blocks[jbl][0]*grid->nbast+blocks[ibl][0],
228 			  &grid->nbast);
229 	    }
230 	}
231     }
232 }
233 
234 
235 /* =================================================================== */
236 /*                 blocked density and KS evaluation                   */
237 /* =================================================================== */
238 
239 
240 #if 0
241 static void
242 printmat(int n, const ergo_real *m, const char *name)
243 {
244   int i, j;
245   printf("Printing matrix %s\n", name);
246   for(i=0; i<n; i++) {
247     for(j=0; j<n; j++)
248       printf("%10.5f", (double)m[i + j*n]);
249     puts("");
250   }
251 }
252 #endif
253 
254 
255 /** computes Fock matrix ksm corresponding to given density matrix
256    dmat.  fast version - uses memory bandwidth-efficient algorithm.
257 */
258 EXTERN_C real
dft_get_xc(int nElectrons,const real * dmat,const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,real * ksm,real * edfty,int nThreads)259 dft_get_xc(int nElectrons, const real* dmat, const BasisInfoStruct& bis,
260            const Molecule& mol, const Dft::GridParams& gss,
261 	   real* ksm, real* edfty, int nThreads)
262 {
263     int nbast2, i, j;
264     real electrons;
265     int nbast = bis.noOfBasisFuncs;
266     Util::TimeMeter tm;
267     bool isGGA = selected_func->is_gga();
268     Dft::FullMatrix res(nbast);
269     Dft::FullMatrix density(dmat, nbast);
270     KsData<Dft::FullMatrix> ds(&res, DFT_MAX_BLLEN);
271     const Dft::FullMatrix *densPtr = &density;
272     nbast2   = nbast*nbast;
273 
274 #if USE_BLAS_IN_XC == 1
275     void (*cblda)(DftIntegratorBl* grid, real * restrict tmp,
276                   int bllen, int blstart, int blend,
277                   KsData<Dft::FullMatrix>* data) =
278       &xcCallbackLdaR<Dft::FullMatrix,XCDistributorLdaBlas>;
279     void (*cbgga)(DftIntegratorBl* grid, real * restrict tmp,
280                   int bllen, int blstart, int blend,
281                   KsData<Dft::FullMatrix>* data) =
282       &xcCallbackGgaR<Dft::FullMatrix,XCDistributorGgaBlas>;
283 #else
284     void (*cblda)(DftIntegratorBl* grid, real * restrict tmp,
285               int bllen, int blstart, int blend,
286                   KsData<Dft::FullMatrix>* data) =
287       &xcCallbackLdaR<Dft::FullMatrix,XCDistributorLda<Dft::FullMatrix> >;
288     void (*cbgga)(DftIntegratorBl* grid, real * restrict tmp,
289                   int bllen, int blstart, int blend,
290                   KsData<Dft::FullMatrix>* data) =
291       &xcCallbackGgaR<Dft::FullMatrix,XCDistributorGga<Dft::FullMatrix> >;
292 #endif
293     electrons = Dft::integrate(1, &densPtr, bis, mol, gss, nThreads,
294                                (DftBlockCallback)
295                                (isGGA ? cbgga : cblda),
296                                &ds);
297 
298     for(i=0; i<nbast; i++) {
299 	int ioff = i*nbast;
300 	for(j=0; j<i; j++) {
301 	    int joff = j*nbast;
302 	    real averag = (res.mat[i+joff] + res.mat[j+ioff]);
303 	    res.mat[i+joff] = res.mat[j+ioff] = averag;
304 	}
305 #if (USE_BLAS_IN_XC  == 1)
306         res.mat[i+i*nbast] *= 2.0;
307 #endif
308     }
309 
310     pthread_mutex_lock(&dft_prop_mutex);
311     *edfty +=ds.energy;
312     mat::axpy(&nbast2, &ONER, res.mat, &ONEI, ksm, &ONEI);
313     pthread_mutex_unlock(&dft_prop_mutex);
314 
315     if(nThreads<=1) {
316         if(KOHNSH_DEBUG) {
317             output_matrix(nbast, res.mat, "kohn sham matrix");
318         }
319         int nElectrons = mol.getNumberOfElectrons();
320         do_output(LOG_CAT_INFO, LOG_AREA_DFT,
321 		  "Electrons: %11.7f %7.1g: xc energy %f (serial)",
322                   (double)electrons,
323                   (double)((electrons-nElectrons)/nElectrons),
324                   (double)ds.energy);
325 	tm.print(LOG_AREA_DFT, __func__);
326     }
327 
328     return electrons;
329 }
330 
331 /* multithreaded interface... */
332 struct xc_data {
333     const real *dmat;
334     const BasisInfoStruct *bis;
335     const Molecule *mol;
336     const Dft::GridParams *gss;
337     real *xc, edfty;
338     real el;
339     int nElectrons;
340     int nThreads;
341 };
342 static void*
dft_get_xc_worker(void * data)343 dft_get_xc_worker(void *data)
344 {
345   static const int XCWORKER_ERROR = 3;
346     struct xc_data *d = (struct xc_data*)data;
347     try {
348       d->el = dft_get_xc(d->nElectrons, d->dmat, *d->bis, *d->mol, *d->gss,
349 			 d->xc, &d->edfty, d->nThreads);
350     } catch(const char *s) {
351       do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
352 		"dft_get_xc_worker thread caught an exception '%s'", s);
353       return (void*)&XCWORKER_ERROR;
354     } catch(const std::bad_alloc & e) {
355       do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
356 		"dft_get_xc_worker thread caught an exception '%s'", e.what());
357       return (void*)&XCWORKER_ERROR;
358     } catch(const std::runtime_error & e) {
359       do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
360 		"dft_get_xc_worker thread caught an exception '%s'", e.what());
361       return (void*)&XCWORKER_ERROR;
362     }  catch(...) {
363       do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
364 		"dft_get_xc_worker thread caught unexpected exception.");
365       return (void*)&XCWORKER_ERROR;
366     }
367 
368     return NULL;
369 }
370 
371 /** Computes the XC interaction matrix for given density matrix @param dmat .
372     @returns the integrated number of electrons.
373     @param nElectrons number of electrons.
374     @param bis a structure describing the used basis set.
375     @param mol a structure describing the molecule.
376     @param gss a structure describing the grid settings.
377     @param xc resulting XC matrix.
378     @param edfty resulting XC energy.
379 */
380 EXTERN_C real
dft_get_xc_mt(int nElectrons,const real * dmat,const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,real * xc,real * edfty)381 dft_get_xc_mt(int nElectrons, const real* dmat, const BasisInfoStruct& bis,
382               const Molecule& mol, const Dft::GridParams& gss,
383 	      real *xc, real* edfty)
384 {
385 
386     int i, threads;
387     real electrons;
388     Util::TimeMeter tm;
389 
390     threads = dft_get_num_threads();
391     std::vector<xc_data> data(threads);
392     std::vector<pthread_t> pids(threads);
393     if(threads == 1) {
394 	/* Do not create any threads at all to avoid stack allocation. */
395         *edfty = 0.0;
396 	electrons = dft_get_xc(nElectrons, dmat, bis, mol, gss, xc, edfty, 1);
397     } else {
398 	for(i=0; i<threads; i++) {
399 	    data[i].nElectrons = nElectrons;
400 	    data[i].dmat = dmat;
401 	    data[i].xc   = xc;
402 	    data[i].edfty = 0.0;
403 	    data[i].bis = &bis;
404 	    data[i].mol = &mol;
405 	    data[i].gss = &gss;
406             data[i].nThreads = threads;
407 	    if (pthread_create(&pids[i], NULL, dft_get_xc_worker, &data[i])) {
408 	      do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
409 			"Creation of thread # %d failed\n", i);
410 	      if (i==0)
411 		throw "No worker threads could be started";
412 	      else
413 		break;
414 	    }
415 	}
416 	*edfty = 0;
417 	electrons = 0;
418 	while ( --i >= 0) {
419 	    pthread_join(pids[i], NULL);
420 	    *edfty += data[i].edfty;
421 	    electrons += data[i].el;
422 	}
423 
424         int nElectrons = mol.getNumberOfElectrons();
425         do_output(LOG_CAT_INFO, LOG_AREA_DFT,
426                   "Electrons: %11.7f %7.1g: xc energy %f (mt)",
427                   (double)electrons,
428                   (double)((electrons-nElectrons)/nElectrons),
429                   (double)*edfty);
430         tm.print(LOG_AREA_DFT, __func__);
431     }
432     return electrons;
433 }
434 
435 /* ===================================================================
436    Blocked, unrestricted code
437    =================================================================== */
438 struct uks_data {
439   Dft::FullMatrix *exca, *excb;
440   real* dRa, *dRb;
441   real* dZa, *dZb, *dZab;
442   real energy;
443 };
444 
445 EXTERN_C real
dft_get_uxc(int nElectrons,const real * dmata,const real * dmatb,const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,real * xca,real * xcb,real * edfty,int nThreads)446 dft_get_uxc(int nElectrons, const real* dmata, const real *dmatb,
447             const BasisInfoStruct& bis, const Molecule& mol,
448              const Dft::GridParams& gss,
449 	    real* xca, real *xcb, real* edfty, int nThreads)
450 {
451     int nbast = bis.noOfBasisFuncs;
452     int nbast2, i, j, imat;
453     real electrons;
454     const Dft::FullMatrix *dmat[2];
455     Util::TimeMeter tm;
456     bool isGGA = selected_func->is_gga();
457     Dft::FullMatrix mata(nbast), matb(nbast);
458     Dft::FullMatrix densa(dmata, nbast);
459     Dft::FullMatrix densb(dmatb, nbast);
460     dmat[0] = &densa; dmat[1] = &densb;
461     nbast2   = nbast*nbast;
462     UksData<Dft::FullMatrix> ds(&mata, &matb, DFT_MAX_BLLEN);
463 
464     void (*cblda)(DftIntegratorBl* grid, real * restrict tmp,
465                   int bllen, int blstart, int blend,
466                   UksData<Dft::FullMatrix>* data)
467 #if USE_BLAS_IN_XC == 1
468       = xcCallbackLdaU<Dft::FullMatrix,XCDistributorLdaBlas >;
469 #else
470       = xcCallbackLdaU<Dft::FullMatrix,XCDistributorLda<Dft::FullMatrix> >;
471 #endif
472     void (*cbgga)(DftIntegratorBl* grid, real * restrict tmp,
473                   int bllen, int blstart, int blend,
474                   UksData<Dft::FullMatrix>* data)
475       = xcCallbackGgaU<Dft::FullMatrix,XCDistributorGgaU<Dft::FullMatrix> >;
476 
477     electrons = Dft::integrate(2, dmat, bis, mol, gss, nThreads,
478                                (DftBlockCallback)
479                                (isGGA ? cbgga : cblda),
480                                &ds);
481 
482     for(imat=0; imat<2; imat++) {
483         real * e = imat ? mata.mat : matb.mat;
484         for(i=0; i<nbast; i++) {
485             int ioff = i*nbast;
486             for(j=0; j<i; j++) {
487                 int joff = j*nbast;
488                 real averag = (e[i+joff] + e[j+ioff]);
489                 e[i+joff] = e[j+ioff] = averag;
490             }
491 #if (USE_BLAS_IN_XC  == 1)
492             if (!isGGA) e[i+i*nbast] *= 2.0;
493 #endif
494         }
495     }
496 
497     pthread_mutex_lock(&dft_prop_mutex);
498     *edfty=ds.energy;
499     mat::axpy(&nbast2, &ONER, mata.mat, &ONEI, xca, &ONEI);
500     mat::axpy(&nbast2, &ONER, matb.mat, &ONEI, xcb, &ONEI);
501     pthread_mutex_unlock(&dft_prop_mutex);
502     if(KOHNSH_DEBUG) {
503         output_matrix(nbast, mata.mat, "Unrestricted xc_alpha matrix");
504         output_matrix(nbast, matb.mat, "Unrestricted xc_alpha matrix");
505     }
506 
507     if(nThreads <= 1) {
508 	do_output(LOG_CAT_INFO, LOG_AREA_DFT,
509 		  "Electrons: %11.7f %7.1g:  U-xc energy %f (serial)",
510 		  (double)electrons,
511                   (double)((electrons-nElectrons)/nElectrons),
512 		  (double)ds.energy);
513 	tm.print(LOG_AREA_DFT, __func__);
514     }
515     return electrons;
516 }
517 
518 /* multithreaded interface... */
519 struct uxc_data {
520     const real* dmata, *dmatb;
521     const BasisInfoStruct *bis;
522     const Molecule *mol;
523     const Dft::GridParams *gss;
524     real* xca,   *xcb;
525     real edfty, el;
526     int nElectrons;
527     int nThreads;
528 };
529 static void*
dft_get_uxc_worker(void * data)530 dft_get_uxc_worker(void *data)
531 {
532     struct uxc_data *d = (struct uxc_data*)data;
533     d->el = dft_get_uxc(d->nElectrons, d->dmata, d->dmatb, *d->bis, *d->mol,
534 			*d->gss, d->xca, d->xcb, &d->edfty, d->nThreads);
535     return NULL;
536 }
537 
538 EXTERN_C real
dft_get_uxc_mt(int nElectrons,const real * dmata,const real * dmatb,const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,real * xca,real * xcb,real * edfty)539 dft_get_uxc_mt(int nElectrons, const real* dmata, const real *dmatb,
540                const BasisInfoStruct& bis, const Molecule& mol,
541 	       const Dft::GridParams& gss,
542                real* xca,   real *xcb, real* edfty)
543 {
544 
545     int i, threads;
546     real electrons = 0;
547 
548     Util::TimeMeter tm;
549 
550     threads = dft_get_num_threads();
551     std::vector<uxc_data> data(threads);
552     std::vector<pthread_t> pids(threads);
553 
554     *edfty = 0.0;
555     if(threads == 1) {
556 	/* Do not create any threads at all to avoid stack allocation. */
557       electrons = dft_get_uxc(nElectrons, dmata, dmatb, bis, mol,
558                               gss, xca, xcb, edfty, threads);
559     } else {
560       for(i=0; i<threads; i++) {
561         data[i].nElectrons = nElectrons;
562         data[i].dmata = dmata;
563         data[i].dmatb = dmatb;
564         data[i].xca = xca;
565         data[i].xcb = xcb;
566         data[i].edfty = 0.0;
567         data[i].bis = &bis;
568         data[i].mol = &mol;
569 	data[i].gss = &gss;
570         data[i].nThreads = threads;
571 	if (pthread_create(&pids[i], NULL, dft_get_uxc_worker, &data[i])) {
572           do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
573                     "Creation of thread # %d failed\n", i);
574 	  if (i==0)
575 	    throw "No worker threads could be started";
576 	  else
577 	    break;
578 	}
579       }
580       while ( --i >= 0) {
581         pthread_join(pids[i], NULL);
582         *edfty += data[i].edfty;
583         electrons += data[i].el;
584       }
585       int nElectrons = mol.getNumberOfElectrons();
586       do_output(LOG_CAT_INFO, LOG_AREA_DFT,
587                 "Electrons: %11.7f %7.1g: u-xc energy %f (mt)",
588                 (double)electrons,
589                 (double)((electrons-nElectrons)/nElectrons),
590                 (double)*edfty);
591       tm.print(LOG_AREA_DFT, __func__);
592     }
593 
594     return electrons;
595 }
596