1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /*-*-mode: C; c-indentation-style: "bsd"; c-basic-offset: 4; -*-*/
31
32 /** @file xc_matrix_sparse.cc
33
34 @brief The sparse XC matrix evaluator.
35
36 @author: Pawel Salek <em>responsible</em>
37
38 (c) Pawel Salek, pawsa@theochem.kth.se.
39 2002.04.05
40
41 This module evaluates DFT contribution KS matrix.
42 */
43
44 #define WITH_PTHREAD 1
45 #if defined(WITH_PTHREAD)
46 #include <pthread.h>
47 static pthread_mutex_t dft_prop_mutex = PTHREAD_MUTEX_INITIALIZER;
48 static pthread_mutex_t dft_hicu_grid_init_mutex = PTHREAD_MUTEX_INITIALIZER;
49 #endif
50
51 #include "aos.h"
52 #include "integrator.h"
53 #include "sparse_matrix.h"
54 #include "xc_matrix_sparse.h"
55 #include "dft_common.h"
56 #include "grid_reader.h"
57 #include "output.h"
58 #include "utilities.h"
59 #include "xc_evaluators.h"
60
61 /* FIXME: remove this dependency */
62 #include "grid_hicu.h"
63
64 /* restrict hints should not be necessary... */
65 #if !defined(restrict)
66 #define restrict
67 #endif
68
69 BEGIN_NAMESPACE(Dft)
70
71
72 class XCEvaluator {
73 protected:
74 const BasisInfoStruct& bisOrig;
75 const IntegralInfo& integralInfo;
76 const Molecule& mol;
77 const Dft::GridParams& gss;
78 std::vector<int> const & permutationHML;
79 int* aoMap;
80 BasisInfoStruct* bisPermuted;
81 Dft::SparsePattern* pattern;
82 public:
83 XCEvaluator(const BasisInfoStruct& bisOrig_,
84 const IntegralInfo& integralInfo_,
85 const Molecule& mol_,
86 const Dft::GridParams& gss_,
87 std::vector<int> const & permutationHML_,
88 const symmMatrix& dens);
89 ~XCEvaluator();
90 };
91
XCEvaluator(const BasisInfoStruct & bisOrig_,const IntegralInfo & integralInfo_,const Molecule & mol_,const Dft::GridParams & gss_,std::vector<int> const & permutationHML_,const symmMatrix & dens)92 XCEvaluator::XCEvaluator(const BasisInfoStruct& bisOrig_,
93 const IntegralInfo& integralInfo_,
94 const Molecule& mol_,
95 const Dft::GridParams& gss_,
96 std::vector<int> const & permutationHML_,
97 const symmMatrix& dens)
98 : bisOrig(bisOrig_), integralInfo(integralInfo_),
99 mol(mol_), gss(gss_), permutationHML(permutationHML_),
100 aoMap(new int[bisOrig.noOfBasisFuncs])
101 {
102 /* We need to create our own permutation of shells. The
103 permutation used in the matrix library permutes basis functions
104 even in the middle of the basis function shell, breaking in
105 this way simple mapping between shells and basis function
106 blocks. We therefore generate here own permutation of
107 shells. This is used to create own BasisInfoStruct used
108 throughout. The final conversion from own format to the "Common
109 Matrix Format" is done at the final assignment phase: the basis
110 function indices are permuted with the inverse
111 transformation. */
112
113 int *shellMap = new int[bisOrig.noOfShells];
114
115 Dft::setupShellMap(bisOrig, shellMap, aoMap);
116 bisPermuted = bisOrig.permuteShells(shellMap, integralInfo);
117 delete []shellMap;
118
119
120 /* Force grid creation so that the sparse pattern is
121 available... Maybe it is getSparsePattern's problem. */
122 ErgoMolInfo molInfo(*bisPermuted, mol);
123 pattern = new SparsePattern(*bisPermuted);
124
125 /* When using HiCu grid generation we need a sparse density
126 matrix. In order to create a sparse density matrix we need a
127 SparsePattern. However, this is only needed when grid file does
128 not exist, and only for the HiCu case. */
129 SparseMatrix* dMatForGridGen = NULL;
130 /* Use mutex lock here to make sure only one thread does this
131 (there is anyway threading inside the HiCu code). */
132 pthread_mutex_lock(&dft_hicu_grid_init_mutex);
133 if(!grid_is_ready() && gss.gridType == Dft::GridParams::TYPE_HICU) {
134 Dft::SparsePattern patternTmp(*bisPermuted);
135 /* Generate sparse pattern. */
136 grid_generate_sparse_pattern(*bisPermuted, gss.hicuParams.maxError,
137 gss.hicuParams.box_size, gss.hicuParams.start_box_size_debug, patternTmp);
138 /* Create dMat already here so it can be used by HiCu grid generator! */
139 dMatForGridGen = new SparseMatrix(patternTmp, dens, aoMap, permutationHML);
140 }
141 else {
142 // We do not really need a sparse density matrix in this case; we just create a dummy.
143 Dft::SparsePattern patternTmp(*bisPermuted);
144 dMatForGridGen = new SparseMatrix(patternTmp);
145 }
146 pthread_mutex_unlock(&dft_hicu_grid_init_mutex);
147
148 do_output(LOG_CAT_INFO, LOG_AREA_DFT, "getXC calling grid_open_full().");
149 Dft::Matrix *mat = createGridMatrix(*dMatForGridGen);
150 DftGridReader* rawgrid = grid_open_full(&molInfo, gss, pattern,
151 mat, *bisPermuted);
152 delete mat;
153 delete dMatForGridGen;
154
155 grid_close(rawgrid);
156 }
157
~XCEvaluator()158 XCEvaluator::~XCEvaluator()
159 {
160 delete bisPermuted;
161 delete pattern;
162 delete []aoMap;
163 }
164
165 class XCEvaluatorRestricted : public XCEvaluator {
166 SparseMatrix *densityMatrix;
167 public:
XCEvaluatorRestricted(const BasisInfoStruct & bisOrig_,const IntegralInfo & integralInfo_,const Molecule & mol_,const Dft::GridParams & gss_,std::vector<int> const & permutationHML_,const symmMatrix & density)168 XCEvaluatorRestricted(const BasisInfoStruct& bisOrig_,
169 const IntegralInfo& integralInfo_,
170 const Molecule& mol_,
171 const Dft::GridParams& gss_,
172 std::vector<int> const & permutationHML_,
173 const symmMatrix& density)
174 : XCEvaluator(bisOrig_, integralInfo_, mol_, gss_, permutationHML_,
175 density),
176 densityMatrix(NULL)
177 {
178 densityMatrix = new SparseMatrix(*pattern, density, aoMap, permutationHML);
179 }
~XCEvaluatorRestricted()180 ~XCEvaluatorRestricted()
181 {
182 delete densityMatrix;
183 }
184
185 real getXC(int nElectrons, symmMatrix& xcm, real* xcEnergy,
186 int nThreads) const;
187 };
188
189 /** Computes Fock matrix xcm corresponding to given density matrix dmat.
190 fast version - uses memory bandwidth-efficient algorithm.
191 */
192 real
getXC(int nElectrons,symmMatrix & xcm,real * xcEnergy,int nThreads) const193 XCEvaluatorRestricted::getXC(int nElectrons, symmMatrix& xcm,
194 real* xcEnergy, int nThreads) const
195 {
196 real electrons;
197 Util::TimeMeter tm;
198
199 *xcEnergy = 0;
200 sync_threads(false, nThreads);
201 SparseMatrix excmat(*pattern);
202
203 KsData<Dft::SparseMatrix> ds(&excmat, DFT_MAX_BLLEN);
204
205 void (*cblda)(DftIntegratorBl* grid, real * restrict tmp,
206 int bllen, int blstart, int blend,
207 KsData<Dft::SparseMatrix>* data)
208 = xcCallbackLdaR<Dft::SparseMatrix,XCDistributorLda<Dft::SparseMatrix> >;
209 void (*cbgga)(DftIntegratorBl* grid, real * restrict tmp,
210 int bllen, int blstart, int blend,
211 KsData<Dft::SparseMatrix>* data)
212 = xcCallbackGgaR<Dft::SparseMatrix,XCDistributorGga<Dft::SparseMatrix> >;
213
214 electrons = integrate(1, &densityMatrix, *bisPermuted, mol, gss, nThreads,
215 (DftBlockCallback)
216 (selected_func->is_gga() ? cbgga : cblda),
217 &ds);
218
219 pthread_mutex_lock(&dft_prop_mutex);
220 *xcEnergy +=ds.energy;
221 excmat.addSymmetrizedTo(xcm, aoMap, permutationHML);
222 pthread_mutex_unlock(&dft_prop_mutex);
223
224
225 if(nThreads<=1) {
226 do_output(LOG_CAT_INFO, LOG_AREA_DFT,
227 "Electrons: %11.7f %7.1g: xc energy %f (serial)",
228 (double)electrons,
229 (double)((electrons-nElectrons)/nElectrons),
230 (double)ds.energy);
231 tm.print(LOG_AREA_DFT, __func__);
232 }
233 return electrons;
234 }
235
236 struct XcData {
237 const XCEvaluatorRestricted* xcEvaluator;
238 int nElectrons;
239 symmMatrix* xcm;
240 real xcEnergy;
241 real el;
242 int nThreads;
243 };
244
245 static void*
xcWorker(void * data)246 xcWorker(void *data)
247 {
248 static const int XCWORKER_ERROR =0;
249 struct XcData *d = (XcData*)data;
250 try {
251 d->el = d->xcEvaluator->getXC(d->nElectrons, *d->xcm, &d->xcEnergy,
252 d->nThreads);
253 } catch(const char *s) {
254 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
255 "xcWorker thread caught an exception '%s'", s);
256 return (void*)&XCWORKER_ERROR;
257 } catch(const std::bad_alloc & e) {
258 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
259 "xcWorker thread caught an exception '%s'", e.what());
260 return (void*)&XCWORKER_ERROR;
261 } catch(const std::runtime_error & e) {
262 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
263 "xcWorker thread caught an exception '%s'", e.what());
264 return (void*)&XCWORKER_ERROR;
265 } catch(...) {
266 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
267 "xcWorker thread caught unexpected exception.");
268 return (void*)&XCWORKER_ERROR;
269 }
270 return NULL;
271 }
272
273 real
getXC_mt(const BasisInfoStruct & bis,const IntegralInfo & integralInfo,const Molecule & mol,const Dft::GridParams & gss,int nElectrons,const symmMatrix & dens,symmMatrix & xcm,real * xcEnergy,std::vector<int> const & permutationHML)274 getXC_mt(const BasisInfoStruct& bis, const IntegralInfo& integralInfo,
275 const Molecule& mol, const Dft::GridParams& gss, int nElectrons,
276 const symmMatrix& dens, symmMatrix& xcm, real* xcEnergy,
277 std::vector<int> const & permutationHML)
278 {
279 int i;
280 real electrons;
281 Util::TimeMeter tm;
282
283 int nThreads = dft_get_num_threads();
284 std::vector<XcData> data(nThreads);
285 std::vector<pthread_t> pids(nThreads);
286
287 XCEvaluatorRestricted xcEvaluator(bis, integralInfo, mol, gss,
288 permutationHML, dens);
289
290 if(nThreads == 1) {
291 /* Do not create any threads at all to avoid stack allocation. */
292 *xcEnergy = 0.0;
293 electrons = xcEvaluator.getXC(nElectrons, xcm, xcEnergy, 1);
294 } else {
295 for(i=0; i<nThreads; i++) {
296 data[i].xcEvaluator = &xcEvaluator;
297 data[i].nElectrons = nElectrons;
298 data[i].xcm = &xcm;
299 data[i].xcEnergy = 0.0;
300 data[i].nThreads = nThreads;
301 if(pthread_create(&pids[i], NULL, xcWorker, &data[i])) {
302 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
303 "Creation of thread # %d failed\n", i);
304 if (i==0)
305 throw "No worker threads could be started";
306 else
307 break;
308 }
309 }
310 *xcEnergy = 0;
311 electrons = 0;
312 while (--i >= 0) {
313 pthread_join(pids[i], NULL);
314 *xcEnergy += data[i].xcEnergy;
315 electrons += data[i].el;
316 }
317 }
318 if(nThreads>1) {
319 do_output(LOG_CAT_INFO, LOG_AREA_DFT,
320 "Electrons: %11.7f %7.1g: KS Energy %f (mt)",
321 (double)electrons,
322 (double)((electrons-nElectrons)/nElectrons),
323 (double)*xcEnergy);
324 tm.print(LOG_AREA_DFT, __func__);
325 }
326 return electrons;
327 }
328
329 real
getXC_seq(const BasisInfoStruct & bis,const IntegralInfo & integralInfo,const Molecule & mol,const Dft::GridParams & gss,int nElectrons,const symmMatrix & dens,symmMatrix & xcm,real * xcEnergy,std::vector<int> const & permutationHML)330 getXC_seq(const BasisInfoStruct& bis, const IntegralInfo& integralInfo,
331 const Molecule& mol, const Dft::GridParams& gss, int nElectrons,
332 const symmMatrix& dens, symmMatrix& xcm, real* xcEnergy,
333 std::vector<int> const & permutationHML)
334 {
335 XCEvaluatorRestricted xcEvr(bis, integralInfo, mol, gss,
336 permutationHML, dens);
337 printf("%p\n", (void*)(&xcEvr));
338 return xcEvr.getXC(nElectrons, xcm, xcEnergy, 1);
339 }
340
341 /* =================================================================== */
342 /* Unrestricted sparse code. */
343 class XCEvaluatorUnrestricted : public XCEvaluator {
344 SparseMatrix *dMat[2];
345 public:
XCEvaluatorUnrestricted(const BasisInfoStruct & bisOrig_,const IntegralInfo & integralInfo_,const Molecule & mol_,const Dft::GridParams & gss_,std::vector<int> const & permutationHML_,const symmMatrix & densA,const symmMatrix & densB)346 XCEvaluatorUnrestricted(const BasisInfoStruct& bisOrig_,
347 const IntegralInfo& integralInfo_,
348 const Molecule& mol_,
349 const Dft::GridParams& gss_,
350 std::vector<int> const & permutationHML_,
351 const symmMatrix& densA,
352 const symmMatrix& densB)
353 : XCEvaluator(bisOrig_, integralInfo_, mol_, gss_, permutationHML_, densA)
354 {
355 dMat[0] = new SparseMatrix(*pattern, densA, aoMap, permutationHML);
356 dMat[1] = new SparseMatrix(*pattern, densB, aoMap, permutationHML);
357 }
~XCEvaluatorUnrestricted()358 ~XCEvaluatorUnrestricted()
359 {
360 delete dMat[0];
361 delete dMat[1];
362 }
363
364 real getXC(int nElectrons, symmMatrix& xcA, symmMatrix& xcB,
365 real* xcEnergy, int nThreads) const;
366 };
367
368 real
getXC(int nElectrons,symmMatrix & xca,symmMatrix & xcb,real * xcEnergy,int nThreads) const369 XCEvaluatorUnrestricted::getXC(int nElectrons,
370 symmMatrix& xca, symmMatrix& xcb,
371 real* xcEnergy, int nThreads) const
372 {
373 real electrons;
374
375 Util::TimeMeter tm;
376 bool isGGA = selected_func->is_gga();
377
378 *xcEnergy = 0.0;
379 sync_threads(false, nThreads);
380 SparseMatrix mata(*pattern), matb(*pattern);
381 struct UksData<Dft::SparseMatrix> ds(&mata, &matb, DFT_MAX_BLLEN);
382
383 void (*cblda)(DftIntegratorBl* grid, real * restrict tmp,
384 int bllen, int blstart, int blend,
385 UksData<SparseMatrix>* data)
386 = xcCallbackLdaU<SparseMatrix,XCDistributorLda<SparseMatrix> >;
387 void (*cbgga)(DftIntegratorBl* grid, real * restrict tmp,
388 int bllen, int blstart, int blend,
389 UksData<SparseMatrix>* data)
390 = xcCallbackGgaU<SparseMatrix,XCDistributorGgaU<SparseMatrix> >;
391
392 electrons = integrate(2, dMat, *bisPermuted, mol, gss,
393 nThreads,
394 DftBlockCallback(isGGA ? cbgga : cblda),
395 &ds);
396
397 pthread_mutex_lock(&dft_prop_mutex);
398 *xcEnergy +=ds.energy;
399 pthread_mutex_unlock(&dft_prop_mutex);
400 mata.addSymmetrizedTo(xca, aoMap, permutationHML);
401 matb.addSymmetrizedTo(xcb, aoMap, permutationHML);
402 pthread_mutex_unlock(&dft_prop_mutex);
403
404 if(nThreads <= 1) {
405 do_output(LOG_CAT_INFO, LOG_AREA_DFT,
406 "Electrons: %11.7f %7.1g: U-xc energy %f (serial)",
407 (double)electrons,
408 (double)((electrons-nElectrons)/nElectrons),
409 (double)ds.energy);
410 tm.print(LOG_AREA_DFT, __func__);
411 }
412 return electrons;
413 }
414
415 /* multithreaded interface TBW... */
416
417 real
getUXC_seq(const BasisInfoStruct & bis,const IntegralInfo & integralInfo,const Molecule & mol,const Dft::GridParams & gss,int nElectrons,const symmMatrix & densA,const symmMatrix & densB,symmMatrix & xcA,symmMatrix & xcB,real * xcEnergy,std::vector<int> const & permutationHML)418 getUXC_seq(const BasisInfoStruct& bis, const IntegralInfo& integralInfo,
419 const Molecule& mol, const Dft::GridParams& gss, int nElectrons,
420 const symmMatrix& densA, const symmMatrix& densB,
421 symmMatrix& xcA, symmMatrix& xcB, real* xcEnergy,
422 std::vector<int> const & permutationHML)
423 {
424 XCEvaluatorUnrestricted xcEvaluator(bis, integralInfo, mol, gss,
425 permutationHML, densA, densB);
426 return xcEvaluator.getXC(nElectrons, xcA, xcB, xcEnergy, 1);
427 }
428
429
430 END_NAMESPACE(Dft)
431