1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /** @file rho-mat.cc Functions for density and gradient evaluation.
31 The density can be evaluated at entire batches of grid points.
32 */
33
34 #ifdef HAVE_CONFIG_H
35 #include "config.h"
36 #endif
37
38 #include <string.h>
39 #include <stdio.h>
40
41 #include "realtype.h"
42 typedef ergo_real real;
43
44 #include "mat_gblas.h"
45 #include "rho-mat.h"
46
47 #if !defined(restrict)
48 #define restrict
49 #endif
50
51 /** helper function for zeroing only used blocks of orbitals. Selected
52 values for the first index are looped over, and all allowed values
53 of the second index.
54
55 @param tmp the matrix[nbast][nvclen]
56
57 @param nblocks pointer to an integer containing number of nonzero
58 orbital blocks.
59 @param iblocks a set of nblocks integer pairs [a,b) defining the
60 range of first index to be zeroed.
61 @param ldaib not used
62 @param nvclen batch length - and the second dimension of tmp.
63 */
64 static void
zeroorbs(real * tmp,const int * nblocks,const int (* iblocks)[2],int ldaib,int nvclen)65 zeroorbs(real *tmp, const int *nblocks, const int (*iblocks)[2],
66 int ldaib, int nvclen)
67 {
68 /* DIMENSION TMP(NVCLEN,NBAST),NBLOCKS(NSYM),IBLOCKS(2,LDAIB,NSYM) */
69 int ibl, idx, k;
70 for(ibl=0; ibl<nblocks[0]; ibl++)
71 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
72 real * tmpi = tmp + idx*nvclen;
73 for(k=0; k<nvclen; k++) tmpi[k] = 0.0;
74 }
75 }
76
77 /** Computes the expectation value <o|dmat|o'> for a symmetric matrix
78 and given set of precomputed orbital values gao. Sparsity of gao as
79 determined with help of nblocks and iblocks is used to reduce the
80 computational effort.
81 @param dmat full square symmetric matrix
82 @param nbast size of dmat
83 @param gao orbital matrix[nbast][nvclen]. Set values are determied
84 by nblocks and iblocks, other values shall not be accessed.
85 @param nblocks number of nonzero row blocks in gao
86 @param iblocks ranges [a,b) of nonzero blocks in gao.
87 @param ldaib not used
88 @param tmp temporary matrix [nbast][nvclen]
89 @param nvclen batch length - number of columns in gao.
90 @param rho the vector[nvclen] where the computed expectation values
91 will be stored.
92 */
93 void
getrho_blocked_lda(int nbast,const real * dmat,const real * restrict gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real * rho)94 getrho_blocked_lda(int nbast, const real * dmat, const real * restrict gao,
95 const int* nblocks, const int (*iblocks)[2],
96 int ldaib, real *tmp, int nvclen, real *rho)
97 {
98 /*
99 DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
100 DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
101 DIMENSION TMP(NVCLEN,NBAST)
102 */
103 int ibl, idx, jbl, k;
104
105 /* dzero(TMP,NVCLEN*NBAST) */
106 zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
107
108 /* only first symmetry */
109 #if USE_BLAS_IN_XC
110 for(ibl=0; ibl<nblocks[0]; ibl++) {
111 static const ergo_real ONER = 1.0;
112 static const ergo_real HALF = 0.5;
113 int cColumns, sumCols;
114 for(jbl=0; jbl<ibl; jbl++) {
115 cColumns = iblocks[jbl][1]-iblocks[jbl][0];
116 sumCols = iblocks[ibl][1]-iblocks[ibl][0];
117 mat::gemm("N", "N", &nvclen, &cColumns, &sumCols, &ONER,
118 gao+iblocks[ibl][0]*nvclen, &nvclen,
119 dmat+iblocks[jbl][0]*nbast + iblocks[ibl][0], &nbast, &ONER,
120 tmp + iblocks[jbl][0]*nvclen, &nvclen);
121 }
122 cColumns = iblocks[ibl][1]-iblocks[ibl][0];
123 sumCols = iblocks[ibl][1]-iblocks[ibl][0];
124 mat::symm("R", "U", &nvclen, &cColumns, &HALF,
125 dmat+iblocks[ibl][0]*nbast + iblocks[ibl][0], &nbast,
126 gao+iblocks[ibl][0]*nvclen, &nvclen, &ONER,
127 tmp + iblocks[ibl][0]*nvclen, &nvclen);
128 }
129 #else /* USE_BLAS_IN_XC */
130 int jdx;
131 real d;
132 for(ibl=0; ibl<nblocks[0]; ibl++)
133 /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
134 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
135 real * restrict tmpj;
136 const real * restrict gaoi = gao + idx*nvclen;
137 for(jbl=0; jbl<nblocks[0]; jbl++) {
138 int jtop = iblocks[jbl][1] > idx ? idx : iblocks[jbl][1];
139 for(jdx=iblocks[jbl][0]; jdx<jtop; jdx++) {
140 d = dmat[idx +jdx*nbast];
141 tmpj = tmp + jdx*nvclen;
142 for(k=0; k<nvclen; k++)
143 tmpj[k] += gaoi[k]*d;
144 }
145 }
146 tmpj = tmp + idx*nvclen;
147 d = dmat[idx +idx*nbast]*0.5;
148 for(k=0; k<nvclen; k++)
149 tmpj[k] += gaoi[k]*d;
150 }
151 #endif /* USE_BLAS_IN_XC */
152 memset(rho, 0, nvclen*sizeof(real));
153 for(ibl=0; ibl<nblocks[0]; ibl++)
154 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
155 const real * restrict gaoi = gao + idx*nvclen;
156 const real * restrict tmpi = tmp + idx*nvclen;
157 for(k=0; k<nvclen; k++)
158 rho[k] += gaoi[k]*tmpi[k]*2.0;
159 }
160 }
161
162 /** Computes the expectation value <o|dmat|o'> and its derivatives for
163 a symmetric matrix and given set of precomputed orbital values and
164 their cartesian derivatives gao. Sparsity of gao as determined with
165 help of nblocks and iblocks is used to reduce the computational
166 effort.
167
168 @param dmat full square symmetric matrix
169 @param nbast size of dmat
170
171 @param gao orbital matrix[4][nbast][nvclen]. First block [0][][]
172 contains orbital values. Subsequent blocks - orbital derivatives wrt
173 x,y, and z coordinates. Set values are determied by nblocks and
174 iblocks, other values shall not be accessed.
175
176 @param nblocks number of nonzero row blocks in gao
177 @param iblocks ranges [a,b) of nonzero blocks in gao.
178 @param ldaib not used
179 @param tmp temporary matrix [nbast][nvclen]
180 @param nvclen batch length - number of columns in gao.
181 @param rho the vector[nvclen] where the computed expectation values
182 will be stored.
183 @param grad a vector of triples where the computed gradient values
184 will be stored.
185 */
186 void
getrho_blocked_gga(int nbast,const real * dmat,const real * restrict gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real * rho,real (* grad)[3])187 getrho_blocked_gga(int nbast, const real * dmat, const real * restrict gao,
188 const int* nblocks, const int (*iblocks)[2],
189 int ldaib, real *tmp, int nvclen,
190 real *rho, real (*grad)[3])
191 {
192 /*
193 DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
194 DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
195 DIMENSION TMP(NVCLEN,NBAST)
196 */
197 int ibl, idx, jbl, k;
198
199 /* dzero(TMP,NVCLEN*NBAST) */
200 zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
201 /* only first symmetry */
202 #if USE_BLAS_IN_XC == 1
203 for(ibl=0; ibl<nblocks[0]; ibl++)
204 for(jbl=0; jbl<nblocks[0]; jbl++) {
205 static const ergo_real ONER = 1.0;
206 int cColumns = iblocks[jbl][1]-iblocks[jbl][0];
207 int sumCols = iblocks[ibl][1]-iblocks[ibl][0];
208 mat::gemm("N", "N", &nvclen, &cColumns, &sumCols, &ONER,
209 gao+iblocks[ibl][0]*nvclen, &nvclen,
210 dmat+iblocks[jbl][0]*nbast + iblocks[ibl][0],
211 &nbast, &ONER,
212 tmp + iblocks[jbl][0]*nvclen, &nvclen);
213 }
214 #else /* USE_BLAS_IN_XC */
215 int jdx;
216 real d;
217 for(ibl=0; ibl<nblocks[0]; ibl++)
218 /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
219 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
220 const real * restrict gaoi = gao + idx*nvclen;
221 for(jbl=0; jbl<nblocks[0]; jbl++) {
222 for(jdx=iblocks[jbl][0]; jdx<iblocks[jbl][1]; jdx++) {
223 real * restrict tmpj = tmp + jdx*nvclen;
224 d = dmat[idx +jdx*nbast];
225 for(k=0; k<nvclen; k++)
226 tmpj[k] += gaoi[k]*d;
227 }
228 }
229 }
230 #endif /* USE_BLAS_IN_XC */
231
232 memset(rho, 0, nvclen*sizeof(real));
233 memset(grad, 0, 3*nvclen*sizeof(real));
234 for(ibl=0; ibl<nblocks[0]; ibl++)
235 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
236 const real * restrict gaoi = gao + idx*nvclen;
237 const real * restrict tmpi = tmp + idx*nvclen;
238 for(k=0; k<nvclen; k++) {
239 rho[k] += gaoi[k]*tmpi[k];
240 grad[k][0] += gaoi[k + nvclen*nbast]*tmpi[k]*2;
241 grad[k][1] += gaoi[k + nvclen*nbast*2]*tmpi[k]*2;
242 grad[k][2] += gaoi[k + nvclen*nbast*3]*tmpi[k]*2;
243 }
244 }
245 }
246
247 /** Computes the expectation value <o|dmat|o'> for a nonsymmetric matrix
248 and given set of precomputed orbital values gao. Sparsity of gao as
249 determined with help of nblocks and iblocks is used to reduce the
250 computational effort.
251 @param dmat full square symmetric matrix
252 @param nbast size of dmat
253 @param gao orbital matrix[nbast][nvclen]. Set values are determied
254 by nblocks and iblocks, other values shall not be accessed.
255 @param nblocks number of nonzero row blocks in gao
256 @param iblocks ranges [a,b) of nonzero blocks in gao.
257 @param ldaib not used
258 @param tmp temporary matrix [nbast][nvclen]
259 @param nvclen batch length - number of columns in gao.
260 @param rho the vector[nvclen] where the computed expectation values
261 will be stored.
262 */
263 void
getexp_blocked_lda(int nbast,const real * dmat,const real * gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real * rho)264 getexp_blocked_lda(int nbast, const real * dmat, const real * gao,
265 const int* nblocks, const int (*iblocks)[2],
266 int ldaib, real *tmp, int nvclen, real *rho)
267 {
268 /*
269 DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
270 DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
271 DIMENSION TMP(NVCLEN,NBAST)
272 */
273 int ibl, idx, jbl, jdx, k;
274 real d;
275
276 /* dzero(TMP,NVCLEN*NBAST) */
277 zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
278
279 /* only first symmetry */
280 for(ibl=0; ibl<nblocks[0]; ibl++)
281 /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
282 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
283 const real * gaoi = gao + idx*nvclen;
284 for(jbl=0; jbl<nblocks[0]; jbl++) {
285 for(jdx=iblocks[jbl][0]; jdx<iblocks[jbl][1]; jdx++) {
286 real *tmpj = tmp + jdx*nvclen;
287 d = dmat[idx +jdx*nbast];
288 for(k=0; k<nvclen; k++)
289 tmpj[k] += gaoi[k]*d;
290 }
291 }
292 }
293
294 memset(rho, 0, nvclen*sizeof(real));
295 for(ibl=0; ibl<nblocks[0]; ibl++)
296 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
297 const real * gaoi = gao + idx*nvclen;
298 const real * tmpi = tmp + idx*nvclen;
299 for(k=0; k<nvclen; k++)
300 rho[k] += gaoi[k]*tmpi[k];
301 }
302 }
303
304 /** Computes the expectation value <o|dmat|o'> and its derivatives for
305 a nonsymmetric matrix and given set of precomputed orbital values
306 and their cartesian derivatives gao. Sparsity of gao as determined
307 with help of nblocks and iblocks is used to reduce the computational
308 effort.
309
310 @param dmat full square symmetric matrix
311 @param nbast size of dmat
312
313 @param gao orbital matrix[4][nbast][nvclen]. First block [0][][]
314 contains orbital values. Subsequent blocks - orbital derivatives wrt
315 x,y, and z coordinates. Set values are determied by nblocks and
316 iblocks, other values shall not be accessed.
317
318 @param nblocks number of nonzero row blocks in gao
319 @param iblocks ranges [a,b) of nonzero blocks in gao.
320 @param ldaib not used
321 @param tmp temporary matrix [nbast][nvclen]
322 @param nvclen batch length - number of columns in gao.
323 @param rgrad a vector of quartets where the computed expectation
324 values and gradient values will be stored. */
325 void
getexp_blocked_gga(int nbast,const real * dmat,const real * gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real (* rgrad)[4])326 getexp_blocked_gga(int nbast, const real * dmat, const real * gao,
327 const int* nblocks, const int (*iblocks)[2],
328 int ldaib, real *tmp, int nvclen,
329 real (*rgrad)[4])
330 {
331 /*
332 DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
333 DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
334 DIMENSION TMP(NVCLEN,NBAST)
335 */
336 int ibl, idx, jbl, jdx, k;
337 real d;
338
339 /* dzero(TMP,NVCLEN*NBAST) */
340 zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
341
342 /* only first symmetry */
343 for(ibl=0; ibl<nblocks[0]; ibl++)
344 /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
345 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
346 const real * gaoi = gao + idx*nvclen;
347 for(jbl=0; jbl<nblocks[0]; jbl++) {
348 for(jdx=iblocks[jbl][0]; jdx<iblocks[jbl][1]; jdx++) {
349 real *tmpj = tmp + jdx*nvclen;
350 d = dmat[idx +jdx*nbast] + dmat[jdx +idx*nbast];
351 for(k=0; k<nvclen; k++)
352 tmpj[k] += gaoi[k]*d;
353 }
354 }
355 }
356
357 memset(rgrad, 0, 4*nvclen*sizeof(real));
358 for(ibl=0; ibl<nblocks[0]; ibl++)
359 for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
360 const real * gaoi = gao + idx*nvclen;
361 const real * tmpi = tmp + idx*nvclen;
362 for(k=0; k<nvclen; k++) {
363 rgrad[k][0] += gaoi[k ]*tmpi[k]*0.5;
364 rgrad[k][1] += gaoi[k + nvclen*nbast ]*tmpi[k];
365 rgrad[k][2] += gaoi[k + nvclen*nbast*2]*tmpi[k];
366 rgrad[k][3] += gaoi[k + nvclen*nbast*3]*tmpi[k];
367 }
368 }
369 }
370