1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30 /*-*-mode: C; c-indentation-style: "bsd"; c-basic-offset: 4; -*-*/
31 
32 /** @file xc_matrix_sparse.cc
33 
34     @brief The sparse XC matrix evaluator.
35 
36     @author: Pawel Salek <em>responsible</em>
37 
38     (c) Pawel Salek, pawsa@theochem.kth.se.
39     2002.04.05
40 
41     This module evaluates DFT contribution KS matrix.
42 */
43 
44 #define WITH_PTHREAD 1
45 #if defined(WITH_PTHREAD)
46 #include <pthread.h>
47 static pthread_mutex_t dft_prop_mutex = PTHREAD_MUTEX_INITIALIZER;
48 static pthread_mutex_t dft_hicu_grid_init_mutex = PTHREAD_MUTEX_INITIALIZER;
49 #endif
50 
51 #include "aos.h"
52 #include "integrator.h"
53 #include "sparse_matrix.h"
54 #include "xc_matrix_sparse.h"
55 #include "dft_common.h"
56 #include "grid_reader.h"
57 #include "output.h"
58 #include "utilities.h"
59 #include "xc_evaluators.h"
60 
61 /* FIXME: remove this dependency */
62 #include "grid_hicu.h"
63 
64 /* restrict hints should not be necessary... */
65 #if !defined(restrict)
66 #define restrict
67 #endif
68 
69 BEGIN_NAMESPACE(Dft)
70 
71 
72 class XCEvaluator {
73 protected:
74     const BasisInfoStruct& bisOrig;
75     const IntegralInfo& integralInfo;
76     const Molecule& mol;
77     const Dft::GridParams& gss;
78     std::vector<int> const & permutationHML;
79     int* aoMap;
80     BasisInfoStruct* bisPermuted;
81     Dft::SparsePattern* pattern;
82 public:
83     XCEvaluator(const BasisInfoStruct& bisOrig_,
84                 const IntegralInfo& integralInfo_,
85                 const Molecule& mol_,
86                 const Dft::GridParams& gss_,
87                 std::vector<int> const & permutationHML_,
88                 const symmMatrix& dens);
89     ~XCEvaluator();
90 };
91 
XCEvaluator(const BasisInfoStruct & bisOrig_,const IntegralInfo & integralInfo_,const Molecule & mol_,const Dft::GridParams & gss_,std::vector<int> const & permutationHML_,const symmMatrix & dens)92 XCEvaluator::XCEvaluator(const BasisInfoStruct& bisOrig_,
93                          const IntegralInfo& integralInfo_,
94                          const Molecule& mol_,
95                          const Dft::GridParams& gss_,
96                          std::vector<int> const & permutationHML_,
97                          const symmMatrix& dens)
98     : bisOrig(bisOrig_), integralInfo(integralInfo_),
99       mol(mol_), gss(gss_), permutationHML(permutationHML_),
100       aoMap(new int[bisOrig.noOfBasisFuncs])
101 {
102     /* We need to create our own permutation of shells. The
103        permutation used in the matrix library permutes basis functions
104        even in the middle of the basis function shell, breaking in
105        this way simple mapping between shells and basis function
106        blocks. We therefore generate here own permutation of
107        shells. This is used to create own BasisInfoStruct used
108        throughout. The final conversion from own format to the "Common
109        Matrix Format" is done at the final assignment phase: the basis
110        function indices are permuted with the inverse
111        transformation. */
112 
113     int *shellMap = new int[bisOrig.noOfShells];
114 
115     Dft::setupShellMap(bisOrig, shellMap, aoMap);
116     bisPermuted = bisOrig.permuteShells(shellMap, integralInfo);
117     delete []shellMap;
118 
119 
120     /* Force grid creation so that the sparse pattern is
121        available... Maybe it is getSparsePattern's problem. */
122     ErgoMolInfo molInfo(*bisPermuted, mol);
123     pattern = new SparsePattern(*bisPermuted);
124 
125     /* When using HiCu grid generation we need a sparse density
126        matrix. In order to create a sparse density matrix we need a
127        SparsePattern. However, this is only needed when grid file does
128        not exist, and only for the HiCu case. */
129     SparseMatrix* dMatForGridGen = NULL;
130     /* Use mutex lock here to make sure only one thread does this
131        (there is anyway threading inside the HiCu code). */
132     pthread_mutex_lock(&dft_hicu_grid_init_mutex);
133     if(!grid_is_ready() && gss.gridType == Dft::GridParams::TYPE_HICU) {
134       Dft::SparsePattern patternTmp(*bisPermuted);
135       /* Generate sparse pattern. */
136       grid_generate_sparse_pattern(*bisPermuted, gss.hicuParams.maxError,
137 				   gss.hicuParams.box_size, gss.hicuParams.start_box_size_debug, patternTmp);
138       /* Create dMat already here so it can be used by HiCu grid generator! */
139       dMatForGridGen = new SparseMatrix(patternTmp, dens, aoMap, permutationHML);
140     }
141     else {
142       // We do not really need a sparse density matrix in this case; we just create a dummy.
143       Dft::SparsePattern patternTmp(*bisPermuted);
144       dMatForGridGen = new SparseMatrix(patternTmp);
145     }
146     pthread_mutex_unlock(&dft_hicu_grid_init_mutex);
147 
148     do_output(LOG_CAT_INFO, LOG_AREA_DFT, "getXC calling grid_open_full().");
149     Dft::Matrix *mat = createGridMatrix(*dMatForGridGen);
150     DftGridReader* rawgrid = grid_open_full(&molInfo, gss, pattern,
151                                             mat, *bisPermuted);
152     delete mat;
153     delete dMatForGridGen;
154 
155     grid_close(rawgrid);
156 }
157 
~XCEvaluator()158 XCEvaluator::~XCEvaluator()
159 {
160     delete bisPermuted;
161     delete pattern;
162     delete []aoMap;
163 }
164 
165 class XCEvaluatorRestricted : public XCEvaluator {
166   SparseMatrix *densityMatrix;
167 public:
XCEvaluatorRestricted(const BasisInfoStruct & bisOrig_,const IntegralInfo & integralInfo_,const Molecule & mol_,const Dft::GridParams & gss_,std::vector<int> const & permutationHML_,const symmMatrix & density)168   XCEvaluatorRestricted(const BasisInfoStruct& bisOrig_,
169                         const IntegralInfo& integralInfo_,
170                         const Molecule& mol_,
171                         const Dft::GridParams& gss_,
172                         std::vector<int> const & permutationHML_,
173                         const symmMatrix& density)
174     : XCEvaluator(bisOrig_, integralInfo_, mol_, gss_, permutationHML_,
175                   density),
176       densityMatrix(NULL)
177   {
178     densityMatrix = new SparseMatrix(*pattern, density, aoMap, permutationHML);
179   }
~XCEvaluatorRestricted()180   ~XCEvaluatorRestricted()
181   {
182     delete densityMatrix;
183   }
184 
185   real getXC(int nElectrons, symmMatrix& xcm, real* xcEnergy,
186              int nThreads) const;
187 };
188 
189 /** Computes Fock matrix xcm corresponding to given density matrix dmat.
190    fast version - uses memory bandwidth-efficient algorithm.
191 */
192 real
getXC(int nElectrons,symmMatrix & xcm,real * xcEnergy,int nThreads) const193 XCEvaluatorRestricted::getXC(int nElectrons, symmMatrix& xcm,
194                              real* xcEnergy, int nThreads) const
195 {
196     real electrons;
197     Util::TimeMeter tm;
198 
199     *xcEnergy = 0;
200     sync_threads(false, nThreads);
201     SparseMatrix excmat(*pattern);
202 
203     KsData<Dft::SparseMatrix> ds(&excmat, DFT_MAX_BLLEN);
204 
205     void (*cblda)(DftIntegratorBl* grid, real * restrict tmp,
206                   int bllen, int blstart, int blend,
207                   KsData<Dft::SparseMatrix>* data)
208       = xcCallbackLdaR<Dft::SparseMatrix,XCDistributorLda<Dft::SparseMatrix> >;
209     void (*cbgga)(DftIntegratorBl* grid, real * restrict tmp,
210                   int bllen, int blstart, int blend,
211                   KsData<Dft::SparseMatrix>* data)
212       = xcCallbackGgaR<Dft::SparseMatrix,XCDistributorGga<Dft::SparseMatrix> >;
213 
214     electrons = integrate(1, &densityMatrix, *bisPermuted, mol, gss, nThreads,
215                           (DftBlockCallback)
216                           (selected_func->is_gga() ? cbgga : cblda),
217                           &ds);
218 
219     pthread_mutex_lock(&dft_prop_mutex);
220     *xcEnergy +=ds.energy;
221     excmat.addSymmetrizedTo(xcm, aoMap, permutationHML);
222     pthread_mutex_unlock(&dft_prop_mutex);
223 
224 
225     if(nThreads<=1) {
226         do_output(LOG_CAT_INFO, LOG_AREA_DFT,
227                   "Electrons: %11.7f %7.1g: xc energy %f (serial)",
228                   (double)electrons,
229                   (double)((electrons-nElectrons)/nElectrons),
230                   (double)ds.energy);
231         tm.print(LOG_AREA_DFT, __func__);
232     }
233     return electrons;
234 }
235 
236 struct XcData {
237     const XCEvaluatorRestricted* xcEvaluator;
238     int nElectrons;
239     symmMatrix* xcm;
240     real xcEnergy;
241     real el;
242     int nThreads;
243 };
244 
245 static void*
xcWorker(void * data)246 xcWorker(void *data)
247 {
248   static const int XCWORKER_ERROR =0;
249   struct XcData *d = (XcData*)data;
250   try {
251       d->el = d->xcEvaluator->getXC(d->nElectrons, *d->xcm, &d->xcEnergy,
252                                     d->nThreads);
253   } catch(const char *s) {
254     do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
255               "xcWorker thread caught an exception '%s'", s);
256     return (void*)&XCWORKER_ERROR;
257   } catch(const std::bad_alloc & e) {
258     do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
259               "xcWorker thread caught an exception '%s'", e.what());
260     return (void*)&XCWORKER_ERROR;
261   } catch(const std::runtime_error & e) {
262     do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
263               "xcWorker thread caught an exception '%s'", e.what());
264     return (void*)&XCWORKER_ERROR;
265   }  catch(...) {
266     do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
267               "xcWorker thread caught unexpected exception.");
268     return (void*)&XCWORKER_ERROR;
269   }
270   return NULL;
271 }
272 
273 real
getXC_mt(const BasisInfoStruct & bis,const IntegralInfo & integralInfo,const Molecule & mol,const Dft::GridParams & gss,int nElectrons,const symmMatrix & dens,symmMatrix & xcm,real * xcEnergy,std::vector<int> const & permutationHML)274 getXC_mt(const BasisInfoStruct& bis, const IntegralInfo& integralInfo,
275          const Molecule& mol, const Dft::GridParams& gss, int nElectrons,
276          const symmMatrix& dens, symmMatrix& xcm, real* xcEnergy,
277          std::vector<int> const & permutationHML)
278 {
279     int i;
280     real electrons;
281     Util::TimeMeter tm;
282 
283     int nThreads = dft_get_num_threads();
284     std::vector<XcData> data(nThreads);
285     std::vector<pthread_t> pids(nThreads);
286 
287     XCEvaluatorRestricted xcEvaluator(bis, integralInfo, mol, gss,
288                                       permutationHML, dens);
289 
290     if(nThreads == 1) {
291         /* Do not create any threads at all to avoid stack allocation. */
292         *xcEnergy = 0.0;
293         electrons = xcEvaluator.getXC(nElectrons, xcm, xcEnergy, 1);
294     } else {
295         for(i=0; i<nThreads; i++) {
296             data[i].xcEvaluator = &xcEvaluator;
297             data[i].nElectrons = nElectrons;
298             data[i].xcm   = &xcm;
299             data[i].xcEnergy = 0.0;
300             data[i].nThreads = nThreads;
301             if(pthread_create(&pids[i], NULL, xcWorker, &data[i])) {
302               do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
303                         "Creation of thread # %d failed\n", i);
304               if (i==0)
305                 throw "No worker threads could be started";
306               else
307                 break;
308             }
309         }
310         *xcEnergy = 0;
311         electrons = 0;
312         while (--i >= 0) {
313             pthread_join(pids[i], NULL);
314             *xcEnergy += data[i].xcEnergy;
315             electrons += data[i].el;
316         }
317     }
318     if(nThreads>1) {
319         do_output(LOG_CAT_INFO, LOG_AREA_DFT,
320                   "Electrons: %11.7f %7.1g: KS Energy %f (mt)",
321                   (double)electrons,
322                   (double)((electrons-nElectrons)/nElectrons),
323                   (double)*xcEnergy);
324         tm.print(LOG_AREA_DFT, __func__);
325     }
326     return electrons;
327 }
328 
329 real
getXC_seq(const BasisInfoStruct & bis,const IntegralInfo & integralInfo,const Molecule & mol,const Dft::GridParams & gss,int nElectrons,const symmMatrix & dens,symmMatrix & xcm,real * xcEnergy,std::vector<int> const & permutationHML)330 getXC_seq(const BasisInfoStruct& bis, const IntegralInfo& integralInfo,
331           const Molecule& mol, const Dft::GridParams& gss, int nElectrons,
332           const symmMatrix& dens, symmMatrix& xcm, real* xcEnergy,
333           std::vector<int> const & permutationHML)
334 {
335   XCEvaluatorRestricted xcEvr(bis, integralInfo, mol, gss,
336                               permutationHML, dens);
337   printf("%p\n", (void*)(&xcEvr));
338   return xcEvr.getXC(nElectrons, xcm, xcEnergy, 1);
339 }
340 
341 /* =================================================================== */
342 /* Unrestricted sparse code. */
343 class XCEvaluatorUnrestricted : public XCEvaluator {
344   SparseMatrix *dMat[2];
345 public:
XCEvaluatorUnrestricted(const BasisInfoStruct & bisOrig_,const IntegralInfo & integralInfo_,const Molecule & mol_,const Dft::GridParams & gss_,std::vector<int> const & permutationHML_,const symmMatrix & densA,const symmMatrix & densB)346   XCEvaluatorUnrestricted(const BasisInfoStruct& bisOrig_,
347                           const IntegralInfo& integralInfo_,
348                           const Molecule& mol_,
349                           const Dft::GridParams& gss_,
350                           std::vector<int> const & permutationHML_,
351                           const symmMatrix& densA,
352                           const symmMatrix& densB)
353     : XCEvaluator(bisOrig_, integralInfo_, mol_, gss_, permutationHML_, densA)
354   {
355     dMat[0] = new SparseMatrix(*pattern, densA, aoMap, permutationHML);
356     dMat[1] = new SparseMatrix(*pattern, densB, aoMap, permutationHML);
357   }
~XCEvaluatorUnrestricted()358   ~XCEvaluatorUnrestricted()
359   {
360     delete dMat[0];
361     delete dMat[1];
362   }
363 
364   real getXC(int nElectrons, symmMatrix& xcA, symmMatrix& xcB,
365              real* xcEnergy, int nThreads) const;
366 };
367 
368 real
getXC(int nElectrons,symmMatrix & xca,symmMatrix & xcb,real * xcEnergy,int nThreads) const369 XCEvaluatorUnrestricted::getXC(int nElectrons,
370                                symmMatrix& xca, symmMatrix& xcb,
371                                real* xcEnergy, int nThreads) const
372 {
373     real electrons;
374 
375     Util::TimeMeter tm;
376     bool isGGA = selected_func->is_gga();
377 
378     *xcEnergy = 0.0;
379     sync_threads(false, nThreads);
380     SparseMatrix mata(*pattern), matb(*pattern);
381     struct UksData<Dft::SparseMatrix> ds(&mata, &matb, DFT_MAX_BLLEN);
382 
383     void (*cblda)(DftIntegratorBl* grid, real * restrict tmp,
384                   int bllen, int blstart, int blend,
385                   UksData<SparseMatrix>* data)
386       = xcCallbackLdaU<SparseMatrix,XCDistributorLda<SparseMatrix> >;
387     void (*cbgga)(DftIntegratorBl* grid, real * restrict tmp,
388                   int bllen, int blstart, int blend,
389                   UksData<SparseMatrix>* data)
390       = xcCallbackGgaU<SparseMatrix,XCDistributorGgaU<SparseMatrix> >;
391 
392     electrons = integrate(2, dMat, *bisPermuted, mol, gss,
393                           nThreads,
394                           DftBlockCallback(isGGA ? cbgga : cblda),
395                           &ds);
396 
397     pthread_mutex_lock(&dft_prop_mutex);
398     *xcEnergy +=ds.energy;
399     pthread_mutex_unlock(&dft_prop_mutex);
400     mata.addSymmetrizedTo(xca, aoMap, permutationHML);
401     matb.addSymmetrizedTo(xcb, aoMap, permutationHML);
402     pthread_mutex_unlock(&dft_prop_mutex);
403 
404     if(nThreads <= 1) {
405       do_output(LOG_CAT_INFO, LOG_AREA_DFT,
406                 "Electrons: %11.7f %7.1g:  U-xc energy %f (serial)",
407                 (double)electrons,
408                 (double)((electrons-nElectrons)/nElectrons),
409                 (double)ds.energy);
410       tm.print(LOG_AREA_DFT, __func__);
411     }
412     return electrons;
413 }
414 
415 /* multithreaded interface TBW... */
416 
417 real
getUXC_seq(const BasisInfoStruct & bis,const IntegralInfo & integralInfo,const Molecule & mol,const Dft::GridParams & gss,int nElectrons,const symmMatrix & densA,const symmMatrix & densB,symmMatrix & xcA,symmMatrix & xcB,real * xcEnergy,std::vector<int> const & permutationHML)418 getUXC_seq(const BasisInfoStruct& bis, const IntegralInfo& integralInfo,
419           const Molecule& mol, const Dft::GridParams& gss, int nElectrons,
420           const symmMatrix& densA, const symmMatrix& densB,
421           symmMatrix& xcA, symmMatrix& xcB, real* xcEnergy,
422           std::vector<int> const & permutationHML)
423 {
424     XCEvaluatorUnrestricted xcEvaluator(bis, integralInfo, mol, gss,
425                                         permutationHML, densA, densB);
426     return xcEvaluator.getXC(nElectrons, xcA, xcB, xcEnergy, 1);
427 }
428 
429 
430 END_NAMESPACE(Dft)
431