1 /*-*-mode:
2
3 !
4 ! Dalton, a molecular electronic structure program
5 ! Copyright (C) by the authors of Dalton.
6 !
7 ! This program is free software; you can redistribute it and/or
8 ! modify it under the terms of the GNU Lesser General Public
9 ! License version 2.1 as published by the Free Software Foundation.
10 !
11 ! This program is distributed in the hope that it will be useful,
12 ! but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ! Lesser General Public License for more details.
15 !
16 ! If a copy of the GNU LGPL v2.1 was not distributed with this
17 ! code, you can obtain one at https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html.
18 !
19
20 !
21
22 */
23 /* define VAR_MPI if you want to use parallel grid generation
24 * for later parallel MPI calculation.
25 *
26 * define USE_PTHREADS if you want to use multithreaded grid
27 * generation. It allows to take an advantage of SMP
28 * architectures. It is currently implemented for BECKE, BECK2 and SSF
29 * type grids (i.e those that do the work in the preprocessing phase.
30 * USE_PTHREADS and VAR_MPI are do not conflict.
31 */
32 /* #define USE_PTHREADS */
33
34 #define __CVERSION__
35 #if !defined(SYS_DEC)
36 /* XOPEN compliance is missing on old Tru64 4.0E Alphas and pow() prototype
37 * is not specified. */
38 #define _XOPEN_SOURCE 500
39 #define _XOPEN_SOURCE_EXTENDED 1
40 #endif
41 #include <assert.h>
42 #include <limits.h>
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <math.h>
46 #include <string.h>
47 #include <strings.h>
48 #include <stdlib.h>
49 #include <time.h>
50 #include <sys/times.h>
51 #include <unistd.h>
52 #include "general.h"
53
54 #include "grid-gen.h"
55
56 /* common screening for cubes/cells */
57 static const real CELL_SIZE = 2.25;
58 /* the weight threshold:: a grid point will be ignored if has a weight
59 * lower than the WEIGHT_THRESHOLD. This should probably depend on the
60 * calculation's accuracy. */
61 static const real WEIGHT_THRESHOLD = 1e-20;
62
63 enum { GRID_TYPE_STANDARD, GRID_TYPE_CARTESIAN };
64
65 static integer gridType = GRID_TYPE_STANDARD;
66
67 void*
dal_malloc_(size_t sz,const char * place,unsigned line)68 dal_malloc_(size_t sz, const char *place, unsigned line)
69 {
70 void* res = malloc(sz);
71 if(!res) {
72 fprintf(stderr, "dal_malloc(sz=%u bytes) at %s (line %u) failed.\n",
73 (unsigned)sz, place, line);
74 exit(1);
75 }
76 return res;
77 }
78
79 /* ------------------------------------------------------------------- */
80 /* Internal data structures used by the grid generator. */
81 /* ------------------------------------------------------------------- */
82 struct GridGenAtomGrid_ {
83 integer uniq_no; /* number of the atom, before the symmetry multiplication */
84 integer Z; /* frequently used. */
85 integer* leb_ang; /* leb_gen index associated with each radial point */
86 real* rad; /* radial points array. */
87 real* wght; /* radial points weights */
88 integer pnt; /* number of radial points for this atom*/
89 };
90
91 struct GridGenMolGrid_ {
92 integer atom_cnt; /* number of atoms grid is generated for*/
93 const GridGenAtom* atom_coords;/* array with atom data */
94 GridGenAtomGrid** atom_grids;/* array of atom grids */
95 real* rij; /* triangular array of inverse distances between atoms */
96 real* aij; /* triangular array of of.... */
97 FILE* fl; /* the stream grid is being saved to */
98 integer total_points; /* total number of generated points */
99 integer off; /* thread number. 0 in serial. */
100 integer nt; /* total number of threads. 1 in serial. */
101 unsigned verbose:1; /* whether grid generation should be verbose or not */
102 };
103
104 /* work data - per thread */
105 struct GridGenWork_ {
106 real* rj; /* temp array for confocal elliptical coordinates */
107 real* p_kg; /* unormalized weights evaluated for given gridpoint/atom */
108 real* vec; /* temporary buffer */
109 real* x; /* x coord */
110 real* y; /* y coord */
111 real* z; /* z coord */
112 real* wg; /* weigths */
113 };
114 typedef struct GridGenWork_ GridGenWork;
115
116 /* atom grid init */
117 GridGenAtomGrid*
agrid_new(integer uniq_no,integer Z)118 agrid_new(integer uniq_no, integer Z)
119 {
120 GridGenAtomGrid* grid = dal_new(1,GridGenAtomGrid);
121 grid->uniq_no = uniq_no;
122 grid->Z = Z;
123 grid->pnt = 0;
124 grid->leb_ang = NULL;
125 grid->rad = NULL;
126 grid->wght = NULL;
127 return grid;
128 }
129
130 static void
agrid_set_radial(GridGenAtomGrid * grid,unsigned cnt)131 agrid_set_radial(GridGenAtomGrid* grid, unsigned cnt)
132 {
133 grid->pnt = cnt;
134 assert(grid->pnt>0);
135 if(grid->leb_ang) free(grid->leb_ang);
136 if(grid->rad) free(grid->rad);
137 if(grid->wght) free(grid->wght);
138 grid->leb_ang = calloc(grid->pnt, sizeof(real));
139 grid->rad = calloc(grid->pnt, sizeof(real));
140 grid->wght = calloc(grid->pnt, sizeof(real));
141 }
142
143
144 /* destroy atomic grid */
145 static void
agrid_free(GridGenAtomGrid * grid)146 agrid_free(GridGenAtomGrid* grid)
147 {
148 free(grid->leb_ang);
149 free(grid->rad);
150 free(grid->wght);
151 free(grid);
152 }
153
154 /* ===================================================================
155 * RADIAL QUADRATURES
156 * the quadrature has to fill in grid->pnt with number of points
157 * and set grid->rad.
158 * =================================================================== */
159
160 /* gc2_rad_cnt:
161 * determinates number of radial points to be used for
162 * Gauss-Chebyshev quadrature of second kind needed to integrate atom
163 * of specified Z number to specified threshold thrl.
164 * the ordinary weights are multiplied by r^2
165 */
166 static integer
gc2_rad_cnt(integer Z,real thrl)167 gc2_rad_cnt(integer Z, real thrl)
168 {
169 static const integer MIN_RAD_PT = 20;
170 integer ta, ri;
171 if(Z<=2) ta=0;
172 else if(Z<=10) ta=1;
173 else if(Z<=18) ta=2;
174 else if(Z<=36) ta=3;
175 else if(Z<=54) ta=4;
176 else if(Z<=86) ta=5;
177 else ta=6;
178
179 ri = rint( -5.0*(3*log10(thrl)-ta+8) );
180 return ri>MIN_RAD_PT ? ri : MIN_RAD_PT;
181 }
182
183 /* gc2_quad:
184 Gauss-Chebyshev quadrature of second kind that we use to generate
185 the radial grid. Requested number of points is in grid->pnt.
186 The grid->rad and grid->wght arrays are filled in.
187 */
188 static void
gen_gc2_quad(GridGenAtomGrid * grid,real thrl,void * quad_data)189 gen_gc2_quad(GridGenAtomGrid* grid, real thrl, void *quad_data)
190 {
191 /* constants */
192 static const real pi_2 = 2.0/M_PI;
193 static const real sfac = 2.0/3.0;
194 const real rfac = 1.0/log(2.0);
195 real n_one, n_pi, wfac;
196 /* variables */
197 real x = 0.0, angl = 0.0, w = 0.0;
198 integer i;
199
200 agrid_set_radial(grid, gc2_rad_cnt(grid->Z,thrl));
201 n_one = grid->pnt+1.0;
202 n_pi = M_PI/n_one;
203 wfac = 16.0/(3*n_one);
204 /* radial points */
205 for (i=0; i<grid->pnt; i++) {
206 real sinangl, sinangl2;
207 x = (grid->pnt-1-2*i)/n_one;
208 angl = n_pi*(i+1);
209 sinangl = sin(angl);
210 sinangl2 = sinangl*sinangl;
211 x += pi_2*(1.0+sfac*sinangl2)*cos(angl)*sinangl;
212 grid->rad[i] = rfac*log(2.0/(1-x));
213 w = wfac*sinangl2*sinangl2;
214 grid->wght[i] = w*rfac/(1.0-x)*grid->rad[i]*grid->rad[i];
215 /* transformation factor accumulated in weight */
216 }
217 }
218 /* gen_lmg_quad:
219 * As proposed by Roland Lindh, Per-Aake Malmqvist and Laura
220 * Gagliardi. */
221
222 struct lmg_data {
223 integer *nucorb;
224 real *aa;
225 integer maxl;
226 };
227 extern void FSYM2(get_maxl_nucind)(integer *maxl, integer*nucind);
228 extern void FSYM(nucbas)(integer*, real* , const integer*);
229 extern void FSYM(radlmg)(real*rad, real* wght, integer *nr, real* raderr,
230 const integer*maxrad, integer *nucorb,
231 real *aa, const integer *);
232
233 static void*
gen_lmg_init(void)234 gen_lmg_init(void)
235 {
236 integer nucind;
237 struct lmg_data *lmg = dal_new(1, struct lmg_data);
238
239 FSYM2(get_maxl_nucind)(&lmg->maxl, &nucind);
240 lmg->nucorb = malloc(2*lmg->maxl*nucind*sizeof(integer));
241 lmg->aa = calloc(4*lmg->maxl*nucind,sizeof(real));
242 if(!lmg->nucorb|| !lmg->aa) {
243 fprintf(stderr,"not enough memory. in gen_lmg_init.\n");
244 exit(1);
245 }
246 FSYM(nucbas)(lmg->nucorb, lmg->aa, &ONEI);
247 return lmg;
248 }
249
250 static void
gen_lmg_quad(GridGenAtomGrid * grid,real thrl,void * quad_data)251 gen_lmg_quad(GridGenAtomGrid* grid, real thrl, void *quad_data)
252 {
253 static const integer MAXRAD = 2000;
254 struct lmg_data *lmg = (struct lmg_data*)quad_data;
255 integer pc;
256
257 agrid_set_radial(grid, MAXRAD);
258 FSYM(radlmg)(grid->rad, grid->wght, &pc, &thrl, &MAXRAD,
259 lmg->nucorb+2*grid->uniq_no*lmg->maxl,
260 lmg->aa +4*grid->uniq_no*lmg->maxl,
261 &ZEROI);
262 grid->pnt = pc;
263
264 grid->rad = realloc(grid->rad, grid->pnt*sizeof(real));
265 grid->wght = realloc(grid->wght, grid->pnt*sizeof(real));
266 }
267
268 static void
gen_lmg_free(void * quad_data)269 gen_lmg_free(void *quad_data)
270 {
271 struct lmg_data *lmg = (struct lmg_data*)quad_data;
272
273 free(lmg->nucorb);
274 free(lmg->aa);
275 free(lmg);
276 }
277
278
279 struct radial_scheme_t {
280 char *name;
281 void *(*quad_init)(void);
282 void (*quad_gen) (GridGenAtomGrid*, real thrl, void *quad_data);
283 void (*quad_free) (void *quad_data);
284 };
285 static struct radial_scheme_t quad_lmg = {
286 "LMG scheme",
287 gen_lmg_init,
288 gen_lmg_quad,
289 gen_lmg_free
290 };
291 static struct radial_scheme_t quad_gc2 = {
292 "Gauss-Chebychev scheme of second kind",
293 NULL,
294 gen_gc2_quad,
295 NULL
296 };
297
298 static struct radial_scheme_t *radial_quad = &quad_lmg;
299
300 /* ------------------------------------------------------------------- */
301 /* The angular grid generation interface */
302 /* ------------------------------------------------------------------- */
303
304 /* routines for generation of Lebedev grid */
305
306 void ld0014_(real* x, real* y, real* z, real* w, integer *pnt);
307 void ld0026_(real* x, real* y, real* z, real* w, integer *pnt);
308 void ld0038_(real* x, real* y, real* z, real* w, integer *pnt);
309 void ld0050_(real* x, real* y, real* z, real* w, integer *pnt);
310 void ld0074_(real* x, real* y, real* z, real* w, integer *pnt);
311 void ld0086_(real* x, real* y, real* z, real* w, integer *pnt);
312 void ld0110_(real* x, real* y, real* z, real* w, integer *pnt);
313 void ld0146_(real* x, real* y, real* z, real* w, integer *pnt);
314 void ld0170_(real* x, real* y, real* z, real* w, integer *pnt);
315 void ld0194_(real* x, real* y, real* z, real* w, integer *pnt);
316 void ld0230_(real* x, real* y, real* z, real* w, integer *pnt);
317 void ld0266_(real* x, real* y, real* z, real* w, integer *pnt);
318 void ld0302_(real* x, real* y, real* z, real* w, integer *pnt);
319 void ld0350_(real* x, real* y, real* z, real* w, integer *pnt);
320 void ld0434_(real* x, real* y, real* z, real* w, integer *pnt);
321 void ld0590_(real* x, real* y, real* z, real* w, integer *pnt);
322 void ld0770_(real* x, real* y, real* z, real* w, integer *pnt);
323 void ld0974_(real* x, real* y, real* z, real* w, integer *pnt);
324 void ld1202_(real* x, real* y, real* z, real* w, integer *pnt);
325 void ld1454_(real* x, real* y, real* z, real* w, integer *pnt);
326
327 /* some of the point type values were guessed but since it was only used
328 for ANGMIN -> quadrature mapping type, the accuracy is not really relevant.
329 Anyone, who would rely on particular mapping here would be crazy.
330 The true values can be found in literature.
331 */
332
333 static struct leb_gen_{
334 integer point_cnt, poly_order;
335 void (*func)(real* x, real* y, real* z, real* w, integer *pnt);
336 } leb_gen[] = {
337 { 14, 5, ld0014_},
338 { 38, 9, ld0038_},
339 { 50, 11, ld0050_},
340 { 86, 15, ld0086_},
341 { 110, 17, ld0110_},
342 { 146, 19, ld0146_},
343 { 170, 21, ld0170_},
344 { 194, 23, ld0194_},
345 { 230, 25, ld0230_},
346 { 266, 27, ld0266_},
347 { 302, 29, ld0302_},
348 { 350, 31, ld0350_},
349 { 434, 35, ld0434_},
350 { 590, 41, ld0590_},
351 { 770, 47, ld0770_},
352 { 974, 53, ld0974_},
353 {1202, 59, ld1202_},
354 {1454, 64, ld1454_},
355 };
356
357 /* leb_get_from_point returns index to leb_gen array that points to
358 the entry having at least iang points.
359 */
360 static integer
leb_get_from_point(integer iang)361 leb_get_from_point(integer iang)
362 {
363 integer i;
364 for(i=ELEMENTS(leb_gen)-1; i>=0; i--)
365 if(iang>=leb_gen[i].point_cnt) return i;
366 return 0;
367 }
368
369 /* leb_get_from_order returns index to leb_gen array that points to
370 the entry contructing polyhedra of at least given order.
371 */
372 static integer
leb_get_from_order(integer poly_order)373 leb_get_from_order(integer poly_order)
374 {
375 integer i;
376 for(i=0; i<ELEMENTS(leb_gen); i++)
377 if(leb_gen[i].poly_order>=poly_order)
378 return i;
379 return 0;
380 }
381
382 /* Trond Saue:
383 The below data gives atomic radii in Angstroms and stems from table I of
384 J.C.Slater: "Atomic Radii in Crystals"
385 J.Chem.Phys. 41(1964) 3199-3204
386 Values for elements marked with an asterisk has been
387 guessed/interpolated
388 */
389 static const real bragg_radii[] = {
390 /* dummy */
391 0.75,
392 /* H He* */
393 0.35, 0.35,
394 /* Li Be B C N O F Ne* */
395 1.45, 1.05, 0.85, 0.70, 0.65, 0.60, 0.50, 0.45,
396 /* Na Mg Al Si P S Cl Ar* */
397 1.80, 1.50, 1.25, 1.10, 1.00, 1.00, 1.00, 1.00,
398 /* K Ca Sc Ti V Cr Mn Fe Co */
399 2.20, 1.80, 1.60, 1.40, 1.35, 1.40, 1.40, 1.40, 1.35,
400 /* Ni Cu Zn Ga Ge As Se Br Kr* */
401 1.35, 1.35, 1.35, 1.30, 1.25, 1.15, 1.15, 1.15, 1.10,
402 /* Rb Sr Y Zr Nb Mo Tc Ru Rh */
403 2.35, 2.00, 1.80, 1.55, 1.45, 1.45, 1.35, 1.30, 1.35,
404 /* Pd Ag Cd In Sn Sb Te I Xe* */
405 1.40, 1.60, 1.55, 1.55, 1.45, 1.45, 1.40, 1.40, 1.40,
406 /* Cs Ba La */
407 2.60, 2.15, 1.95,
408 /* Ce Pr Nd Pm Sm Eu Gd */
409 1.85, 1.85, 1.85, 1.85, 1.85, 1.85, 1.80,
410 /* Tb Dy Ho Er Tm Yb Lu */
411 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
412 /* Hf Ta W Re Os Ir Pt Au Hg */
413 1.55, 1.45, 1.35, 1.30, 1.30, 1.35, 1.35, 1.35, 1.50,
414 /* Tl Pb* Bi Po At* Rn* */
415 1.90, 1.75, 1.60, 1.90, 1.50, 1.50,
416 /* Fr* Ra Ac */
417 2.15, 2.15, 1.95,
418 /* rad(U): 1.75 --> 1.37D0 */
419 /* Th Pa U Np Pu Am Cm* */
420 1.80, 1.80, 1.37, 1.75, 1.75, 1.75, 1.75,
421 /* Bk* Cf* Es* Fm* Md* No* Lw* */
422 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75
423 };
424
425
426 /* ------------------------------------------------------------------- */
427 /* The grid generator configuration data with defaults. */
428 /* ------------------------------------------------------------------- */
429 /* include_selected_partitioning scales the atomic grid centered at
430 * atom no 'catom'.
431 */
432 /* max number of points per atom per angular shell */
433 #define MAX_PT_PER_SHELL (leb_gen[ELEMENTS(leb_gen)-1].point_cnt)
434
435 /* include_partitioning_weights:
436 scale weights
437 */
438 #define IDX(arr,i,j) arr[(i)*MAX_PT_PER_SHELL +(j)]
439 static void
gridgen_compute_rjs(GridGenWork * ggw,GridGenMolGrid * mg,integer point_cnt,integer idx)440 gridgen_compute_rjs(GridGenWork *ggw, GridGenMolGrid* mg,
441 integer point_cnt, integer idx)
442 {
443 integer atno, ptno;
444
445 for(atno=0; atno<mg->atom_cnt; atno++) {
446 for(ptno=0; ptno<point_cnt; ptno++) {
447 real dx = mg->atom_coords[atno].x - ggw->x[idx+ptno];
448 real dy = mg->atom_coords[atno].y - ggw->y[idx+ptno];
449 real dz = mg->atom_coords[atno].z - ggw->z[idx+ptno];
450
451 assert(atno>=0 && atno<mg->atom_cnt);
452 assert(ptno>=0 && ptno<MAX_PT_PER_SHELL);
453 ggw->IDX(rj,atno,ptno) = sqrt(dx*dx + dy*dy + dz*dz);
454 ggw->IDX(p_kg,atno,ptno) = 1.0;
455 }
456 }
457 }
458 /* include_partitioning_becke_corr:
459 multiply current sphere/shell generated for specified atom by space
460 partitioning weights as described by Becke.
461 The computed weights are somehow redundant: unnormalized
462 weights p_kg are computed for all atoms, althougth only one is
463 needed at this stage. The algorithm should probably be restructured
464 to avoid this.
465 */
466 #define HARDNESS1 11
467 static void
becke_orig_preprocess(GridGenMolGrid * mg,integer atom,integer point_cnt,GridGenWork * ggw,integer idx,integer verbose)468 becke_orig_preprocess(GridGenMolGrid* mg, integer atom, integer point_cnt,
469 GridGenWork *ggw, integer idx, integer verbose)
470 {
471 integer atno, atno2, ptno, h, isign=-1;
472 real mu, mu2, g_mu, apasc;
473 real xpasc[HARDNESS1], facult[HARDNESS1];
474 integer pc;
475 /* compute confocal ellipical coordinates for the batch of points to
476 * be processed. */
477
478 if(mg->atom_cnt==1)
479 return;
480 facult[0] = 1;
481 for(h=1; h<HARDNESS1; h++)
482 facult[h] = facult[h-1]*h;
483
484 for(h=HARDNESS1-1; h>=0; h--) {
485 isign = -isign;
486 xpasc[h] = isign*facult[HARDNESS1-1]/
487 ((2*h+1)*facult[h]*facult[HARDNESS1-1-h]);
488 }
489 xpasc[0] = 1;
490 apasc = 0;
491 for(h=0; h<HARDNESS1; h++) apasc += xpasc[h];
492 apasc = 0.5/apasc;
493
494 gridgen_compute_rjs(ggw, mg, point_cnt, idx);
495
496 if(verbose)printf("Computing cell functions for atom %d\n", atom);
497 for(atno=1; atno<mg->atom_cnt; atno++) {
498 for(atno2=0; atno2<atno; atno2++) {
499 real dist = mg->rij[atno2+(atno*(atno-1))/2];
500 for(ptno=0; ptno<point_cnt; ptno++) {
501 mu =(ggw->IDX(rj,atno,ptno)-ggw->IDX(rj,atno2,ptno))*dist;
502 mu2 = mu*mu;
503 g_mu = 0;
504 for(h=0; h<HARDNESS1; h++) {
505 g_mu += xpasc[h]*mu;
506 mu *= mu2;
507 }
508 ggw->IDX(p_kg,atno,ptno) *= 0.5-apasc*g_mu;
509 ggw->IDX(p_kg,atno2,ptno) *= 0.5+apasc*g_mu;
510 }
511 }
512 }
513 /* compute weight normalization factors */
514 pc = point_cnt; dzero_(ggw->vec, &pc);
515 for(atno=0; atno<mg->atom_cnt; atno++)
516 // radovan:
517 // this if {} is to exclude centers which carry no grid points
518 // these are point charges without basis sets
519 // they should not be included in the partitioning
520 if (mg->atom_grids[atno]->pnt > 0) {
521 for(ptno=0; ptno<point_cnt; ptno++)
522 ggw->vec[ptno] += ggw->IDX(p_kg,atno,ptno);
523 }
524
525 /*
526 * Apply the computed weights
527 */
528 for(ptno=0; ptno<point_cnt; ptno++)
529 ggw->wg[idx+ptno] *= ggw->IDX(p_kg,atom,ptno)/ggw->vec[ptno];
530 }
531
532 #define HARDNESS2 3
533 static void
becke_corr_preprocess(GridGenMolGrid * mg,integer atom,integer point_cnt,GridGenWork * ggw,integer idx,integer verbose)534 becke_corr_preprocess(GridGenMolGrid* mg, integer atom, integer point_cnt,
535 GridGenWork *ggw, integer idx, integer verbose)
536 {
537 integer atno, atno2, ptno, h;
538 real mu, g_mu;
539 integer pc;
540 /* compute confocal ellipical coordinates for the batch of points to
541 * be processed. */
542
543 if(mg->atom_cnt==1)
544 return;
545 gridgen_compute_rjs(ggw, mg, point_cnt, idx);
546
547 if(verbose)printf("Computing cell functions for atom %d\n", atom);
548 for(atno=1; atno<mg->atom_cnt; atno++) {
549 for(atno2=0; atno2<atno; atno2++) {
550 real dist = mg->rij[atno2+(atno*(atno-1))/2];
551 real bfac = mg->aij[atno2+(atno*(atno-1))/2];
552 for(ptno=0; ptno<point_cnt; ptno++) {
553 mu =(ggw->IDX(rj,atno,ptno)-ggw->IDX(rj,atno2,ptno))*dist;
554 mu += bfac*(1-mu*mu);
555 if(mu<-1) mu= -1; /* numerical error correction, needed? */
556 if(mu>1) mu= 1; /* numerical error correction, needed? */
557 for(g_mu = mu, h=0; h<HARDNESS2; h++)
558 g_mu = 0.5*g_mu*(3.0-g_mu*g_mu);
559 ggw->IDX(p_kg,atno,ptno) *= 0.5*(1.0-g_mu);
560 ggw->IDX(p_kg,atno2,ptno) *= 0.5*(1.0+g_mu);
561 }
562 }
563 }
564 /* compute weight normalization factors */
565 pc = point_cnt; dzero_(ggw->vec, &pc);
566 for(atno=0; atno<mg->atom_cnt; atno++)
567 for(ptno=0; ptno<point_cnt; ptno++)
568 ggw->vec[ptno] += ggw->IDX(p_kg,atno,ptno);
569
570 /*
571 * Apply the computed weights
572 */
573 for(ptno=0; ptno<point_cnt; ptno++)
574 ggw->wg[idx+ptno] *= ggw->IDX(p_kg,atom,ptno)/ggw->vec[ptno];
575 }
576
577 /* include_partitioning_ssf:
578 multiply current sphere/shell by space partitioning weights as
579 described in SSF article.
580 FIXME: compute relevant atoms only once.
581 */
582 static void
ssf_preprocess(GridGenMolGrid * mg,integer atom,integer point_cnt,GridGenWork * ggw,integer idx,integer verbose)583 ssf_preprocess(GridGenMolGrid* mg, integer atom, integer point_cnt,
584 GridGenWork *ggw, integer idx, integer verbose)
585 {
586 static const real SSF_CUTOFF = 0.64;
587 integer atno, ptno;
588 real mu, g_mu, pr, rx, ry, rz;
589 integer * relevant_atoms = dal_new(mg->atom_cnt, integer);
590 integer atomi=0, ati, ati2, rel_atom_cnt = 0;
591 integer pc;
592
593 rx = ggw->x[idx] - mg->atom_coords[atom].x;
594 ry = ggw->y[idx] - mg->atom_coords[atom].y;
595 rz = ggw->z[idx] - mg->atom_coords[atom].z;
596 pr = sqrt(rx*rx+ry*ry+rz*rz); /* radius of the sphere */
597
598 /* find relevant atoms for 'atom':
599 * separate loops for atoms below and above */
600
601 for(atno=0; atno<atom; atno++) {
602 real dist = mg->rij[atno+(atom*(atom-1))/2];
603 if(2*pr*dist>1-SSF_CUTOFF)
604 relevant_atoms[rel_atom_cnt++] = atno;
605 }
606 atomi = rel_atom_cnt;
607 relevant_atoms[rel_atom_cnt++] = atom;
608 for(atno=atom+1; atno<mg->atom_cnt; atno++) {
609 real dist = mg->rij[atom+(atno*(atno-1))/2];
610 if(2*pr*dist>1-SSF_CUTOFF)
611 relevant_atoms[rel_atom_cnt++] = atno;
612 }
613
614 if(rel_atom_cnt==1) {
615 free(relevant_atoms);
616 return;
617 }
618
619 /* compute confocal ellipical coordinates for the batch of points to
620 * be processed. */
621 for(ati=0; ati<rel_atom_cnt; ati++) {
622 atno = relevant_atoms[ati];
623 for(ptno=0; ptno<point_cnt; ptno++) {
624 real dx = mg->atom_coords[atno].x-ggw->x[ptno+idx];
625 real dy = mg->atom_coords[atno].y-ggw->y[ptno+idx];
626 real dz = mg->atom_coords[atno].z-ggw->z[ptno+idx];
627
628 assert(atno>=0 && atno<mg->atom_cnt);
629 assert(ptno>=0 && ptno<MAX_PT_PER_SHELL);
630 ggw->IDX(rj,ati,ptno) = sqrt(dx*dx + dy*dy + dz*dz);
631 ggw->IDX(p_kg,ati,ptno) = 1.0;
632 }
633 }
634
635 if(verbose)printf("Computing cell functions for atom %d\n", atom);
636 for(ati=1; ati<rel_atom_cnt; ati++) {
637 atno = relevant_atoms[ati];
638 for(ati2=0; ati2<ati; ati2++) {
639 integer atno2 = relevant_atoms[ati2];
640 real mu2;
641 real dist = mg->rij[atno2+(atno*(atno-1))/2];
642 for(ptno=0; ptno<point_cnt; ptno++) {
643 mu =(ggw->IDX(rj,ati,ptno)-ggw->IDX(rj,ati2,ptno))*dist;
644 if(mu<=-SSF_CUTOFF)
645 g_mu = -1;
646 else if(mu>=SSF_CUTOFF)
647 g_mu = 1;
648 else {
649 mu /= SSF_CUTOFF; mu2=mu*mu;
650 g_mu = 0.0625*mu*(35+mu2*(-35+mu2*(21-5*mu2)));
651 }
652 ggw->IDX(p_kg,ati,ptno) *= 0.5*(1.0-g_mu);
653 ggw->IDX(p_kg,ati2,ptno) *= 0.5*(1.0+g_mu);
654 }
655 }
656 }
657 /* compute weight normalization factors */
658 pc = point_cnt; dzero_(ggw->vec, &pc);
659 for(ati=0; ati<rel_atom_cnt; ati++)
660 for(ptno=0; ptno<point_cnt; ptno++)
661 ggw->vec[ptno] += ggw->IDX(p_kg,ati,ptno);
662
663 /*
664 * Apply the computed weights
665 */
666 for(ptno=0; ptno<point_cnt; ptno++)
667 ggw->wg[idx+ptno] *= ggw->IDX(p_kg,atomi,ptno)/ggw->vec[ptno];
668
669 free(relevant_atoms);
670 }
671
672 /** include_block_partitioning transforms a set of POINT_CNT points.
673 */
674 typedef struct {
675 real (*coor)[4];
676 real *rj;
677 real *p_kg;
678 real *vec;
679 integer LDA; /* leading dimension of rj and p_kg */
680 } GGBlockWork;
681
682 static void
block_work_init(GGBlockWork * ggw,real (* coorw)[4],integer atom_cnt,integer point_cnt)683 block_work_init(GGBlockWork* ggw, real (*coorw)[4],
684 integer atom_cnt, integer point_cnt)
685 {
686 ggw->coor = coorw;
687 ggw->LDA = point_cnt;
688 ggw->rj = dal_new(atom_cnt*point_cnt, real);
689 ggw->p_kg = dal_new(atom_cnt*point_cnt, real);
690 ggw->vec = dal_new(point_cnt, real);
691 }
692
693 static void
block_work_release(GGBlockWork * ggw)694 block_work_release(GGBlockWork* ggw)
695 {
696 free(ggw->rj);
697 free(ggw->p_kg);
698 free(ggw->vec);
699 }
700
701 static void
block_compute_rjs(GGBlockWork * ggw,GridGenMolGrid * mg,integer rel_at_cnt,integer * relevant_atoms,integer point_cnt)702 block_compute_rjs(GGBlockWork *ggw, GridGenMolGrid* mg,
703 integer rel_at_cnt, integer *relevant_atoms,
704 integer point_cnt)
705 {
706 integer ptno, i;
707
708 for(i=0; i<rel_at_cnt; i++) {
709 integer atno = relevant_atoms[i];
710 for(ptno=0; ptno<point_cnt; ptno++) {
711 real dx = mg->atom_coords[atno].x - ggw->coor[ptno][0];
712 real dy = mg->atom_coords[atno].y - ggw->coor[ptno][1];
713 real dz = mg->atom_coords[atno].z - ggw->coor[ptno][2];
714
715 ggw->rj [i*ggw->LDA+ptno] = sqrt(dx*dx + dy*dy + dz*dz);
716 ggw->p_kg[i*ggw->LDA+ptno] = 1.0;
717 }
718 }
719 }
720
721 static integer
block_postprocess(GridGenMolGrid * mg,real * center,integer point_cnt,const integer * atom_nums,real (* coorw)[4])722 block_postprocess(GridGenMolGrid *mg, real *center,
723 integer point_cnt, const integer *atom_nums,
724 real (*coorw)[4])
725 {
726 integer atno, ptno, h, isign=-1, i, j;
727 real mu, mu2, g_mu, apasc;
728 real xpasc[HARDNESS1], facult[HARDNESS1];
729 /* compute confocal ellipical coordinates for the batch of points to
730 * be processed. */
731 integer *relevant_atoms = dal_new(mg->atom_cnt, integer);
732 /* map2r: map from all to relevant_atoms array */
733 integer *map2r = dal_new(mg->atom_cnt, integer);
734 integer uniq_atoms;
735 GGBlockWork ggw;
736 integer dest;
737 integer pc;
738
739 /* we find first atoms that relevant for this cell.
740 * We do it by linear search which will scale as N^2
741 * but we can live with that for now.
742 */
743 uniq_atoms = 0;
744 for(i=0; i<mg->atom_cnt; i++) map2r[i] = -1;
745 for(atno=0; atno<mg->atom_cnt; atno++) {
746 GridGenAtomGrid *ag = mg->atom_grids[atno];
747 real dx = mg->atom_coords[atno].x - center[0];
748 real dy = mg->atom_coords[atno].y - center[1];
749 real dz = mg->atom_coords[atno].z - center[2];
750 real dist2 = dx*dx + dy*dy + dz*dz;
751 real r;
752 if(ag->pnt == 0)
753 continue;
754 assert(ag->pnt>=2);
755 assert(ag->rad[0]> ag->rad[1]);
756 /* Increase the radius beyond the last point to let the grid
757 * time to end. Adding a scaled-up difference between last
758 * two radial points is possibly the best way out. */
759 r = ag->rad[0] + CELL_SIZE;
760 if(r*r>dist2) {
761 map2r[atno] = uniq_atoms;
762 relevant_atoms[uniq_atoms++] = atno;
763 }
764 }
765 for(i=0; i<point_cnt; i++) {
766 integer atnoi = atom_nums[i];
767 if(map2r[atnoi] == -1) {
768 map2r[atnoi] = uniq_atoms;
769 relevant_atoms[uniq_atoms++] = atnoi;
770 fort_print("Internal safety check corrected for atom %d - "
771 "please report.", atnoi);
772 }
773 }
774 if(uniq_atoms<=1) { /* 0 cannot happen and 1 - no partitioning. */
775 free(relevant_atoms);
776 free(map2r);
777 return point_cnt;
778 }
779 block_work_init(&ggw, coorw, uniq_atoms, point_cnt);
780 block_compute_rjs(&ggw, mg, uniq_atoms, relevant_atoms,
781 point_cnt);
782
783 facult[0] = 1;
784 for(h=1; h<HARDNESS1; h++)
785 facult[h] = facult[h-1]*h;
786
787 for(h=HARDNESS1-1; h>=0; h--) {
788 isign = -isign;
789 xpasc[h] = isign*facult[HARDNESS1-1]/
790 ((2*h+1)*facult[h]*facult[HARDNESS1-1-h]);
791 }
792 xpasc[0] = 1;
793 apasc = 0;
794 for(h=0; h<HARDNESS1; h++) apasc += xpasc[h];
795 apasc = 0.5/apasc;
796
797 for(i=1; i<uniq_atoms; i++) {
798 integer atno = relevant_atoms[i];
799 for(j=0; j<i; j++) {
800 real dist, bfac;
801 integer atno2 = relevant_atoms[j];
802 dist = mg->rij[atno2+(atno*(atno-1))/2];
803 bfac = mg->aij[atno2+(atno*(atno-1))/2];
804 for(ptno=0; ptno<point_cnt; ptno++) {
805 mu =(ggw.rj[ggw.LDA*i+ptno]-
806 ggw.rj[ggw.LDA*j+ptno])*dist;
807 mu += bfac*(1-mu*mu);
808 mu2 = mu*mu;
809 g_mu = 0;
810 for(h=0; h<HARDNESS1; h++) {
811 g_mu += xpasc[h]*mu;
812 mu *= mu2;
813 }
814 ggw.p_kg[ggw.LDA*i+ptno] *= 0.5-apasc*g_mu;
815 ggw.p_kg[ggw.LDA*j+ptno] *= 0.5+apasc*g_mu;
816 }
817 }
818 }
819 /* compute weight normalization factors */
820 pc = point_cnt; dzero_(ggw.vec, &pc);
821 for(i=0; i<uniq_atoms; i++) {
822 for(ptno=0; ptno<point_cnt; ptno++)
823 ggw.vec[ptno] += ggw.p_kg[ggw.LDA*i+ptno];
824 }
825
826 /*
827 * Apply the computed weights removing at the same time
828 * points with low weight.
829 */
830 for(dest=0, ptno=0; ptno<point_cnt; ptno++) {
831 integer atom = map2r[atom_nums[ptno]];
832 real factor = ggw.p_kg[ggw.LDA*atom+ptno]/ggw.vec[ptno];
833 if(factor >= WEIGHT_THRESHOLD) {
834 coorw[dest][0] = coorw[ptno][0];
835 coorw[dest][1] = coorw[ptno][1];
836 coorw[dest][2] = coorw[ptno][2];
837 coorw[dest][3] = coorw[ptno][3]*factor;
838 dest++;
839 }
840 }
841 free(relevant_atoms);
842 free(map2r);
843 block_work_release(&ggw);
844 return dest;
845 }
846
847 /* Structure describing chosen partitioning scheme */
848 struct partitioning_scheme_t {
849 char *name;
850 void (*preprocess)
851 (GridGenMolGrid* mg, integer catom, integer point_cnt,
852 GridGenWork *ggw, integer idx, integer verbose);
853 integer (*postprocess)
854 (GridGenMolGrid* mg, real *center,
855 integer point_cnt, const integer *atom_nums,
856 real (*coor)[4]);
857 };
858 static struct partitioning_scheme_t partitioning_becke_corr =
859 { "Becke partitioning with atomic radius correction",
860 becke_corr_preprocess, NULL };
861 static struct partitioning_scheme_t partitioning_becke_orig =
862 { "Original Becke partitioning", becke_orig_preprocess };
863 static struct partitioning_scheme_t partitioning_ssf =
864 { "SSF partitioning", ssf_preprocess, NULL };
865 static struct partitioning_scheme_t partitioning_block =
866 { "Blocked partitioning for large molecules/parallel calc:s.",
867 NULL, block_postprocess };
868
869 static struct partitioning_scheme_t *selected_partitioning =
870 &partitioning_becke_orig;
871
872
873 /* =================================================================== */
874 /* linear grid generator */
875 /* =================================================================== */
876 /*the bucketing scheme. first shot: nlog n. Load up all grid points,
877 assign them [nx,ny,nz] tuples. sort the tuples to collect the ones
878 sharing same cube. Within each cube select one that is closest to
879 the center. Save cubes.
880 */
881 #if defined(SYS_DEC)
882 #if defined(CHAR_BIT)
883 #undef CHAR_BIT
884 #endif
885 /* other architectures define it properly on their own */
886 #define CHAR_BIT 8
887 #endif
888 #define KEY_BITS (sizeof(GridPointKey)*CHAR_BIT)
889
890 typedef unsigned GridPointKey;
891 struct point_key_t {
892 GridPointKey key; /* unique identificator of the box */
893 unsigned index;
894 };
895
896
897 /* assume currently constant amounts of bits per coordinate */
898 static int
comp_point_key(const void * a,const void * b)899 comp_point_key(const void* a, const void* b)
900 {
901 return ((struct point_key_t*)a)->key-((struct point_key_t*)b)->key;
902 }
903
904 void FSYM(gtexts)(real* r2);
905 void FSYM(getblocks)(real *center, real *CELLSZ, real RSHEL2[],
906 integer *NBLCNT, integer (*IBLCKS)[2]);
907 static void
boxify_load_grid(const char * fname,integer point_cnt,real * work,integer worksz,real (** coorw)[4],integer * x_allocated,integer * atom_idx)908 boxify_load_grid(const char *fname, integer point_cnt, real *work, integer worksz,
909 real (**coorw)[4], integer *x_allocated,
910 integer *atom_idx)
911 {
912 integer idx, cnt;
913 real *chunk;
914 FILE *f;
915
916 if(worksz<4*point_cnt*sizeof(real)) {
917 fprintf(stderr, "wrkmem too small (%d, needed %u) trying malloc.\n",
918 worksz, (unsigned)(4*point_cnt*sizeof(real)) );
919 *x_allocated = 1;
920 chunk = malloc(4*point_cnt*sizeof(real));
921 if(!chunk) {
922 fprintf(stderr, "loading grid into mem failed, too.\n"
923 "worksz=%d needed size=%u\n", worksz,
924 (unsigned) (4*point_cnt*sizeof(real)) );
925 dalton_quit("no mem in load_grid: 2");
926 }
927 } else chunk = work;
928 *coorw = (real(*)[4]) chunk;
929
930 f = fopen(fname,"rb");
931 if(!f) {
932 fprintf(stderr, "internal error, cannot open the grid file %s.\n",
933 fname);
934 exit(1);
935 }
936 idx = 0;
937 while(fread(&cnt, sizeof(cnt), 1, f)==1) {
938 integer aidx, i;
939 assert(cnt+idx<=point_cnt);
940 if(fread(&aidx, sizeof(integer), 1, f) != 1) dalton_quit("ERROR GRID1");
941 if(atom_idx)
942 for(i=0; i<cnt; i++) atom_idx[idx+i] = aidx;
943 if(fread(*coorw+idx, sizeof(real), 4*cnt, f) != 4*cnt)
944 dalton_quit("ERROR GRID1, cnt=%d", cnt);
945 idx += cnt;
946 }
947 fclose(f);
948 }
949
950 /* lo and hi point to three-element vectors */
951 static void
boxify_create_index(real cell_size,real (* coor)[4],integer point_cnt,struct point_key_t * index,real * lo,real * hi)952 boxify_create_index(real cell_size, real (*coor)[4], integer point_cnt,
953 struct point_key_t * index,
954 real *lo, real *hi)
955 {
956 real fac;
957 integer i, bpc = KEY_BITS/3; /* bits per coordinate */
958 lo[0] = lo[1] = lo[2] = +1e20;
959 hi[0] = hi[1] = hi[2] = -1e20;
960
961 for(i=0; i<point_cnt; i++) {
962 if(coor[i][0] < lo[0]) lo[0] = coor[i][0];
963 else if(coor[i][0] > hi[0]) hi[0] = coor[i][0];
964 if(coor[i][1] < lo[1]) lo[1] = coor[i][1];
965 else if(coor[i][1] > hi[1]) hi[1] = coor[i][1];
966 if(coor[i][2] < lo[2]) lo[2] = coor[i][2];
967 else if(coor[i][2] > hi[2]) hi[2] = coor[i][2];
968 }
969 if( (1<<bpc) < (hi[0] - lo[0])/cell_size) fputs("error: x\n", stderr);
970 if( (1<<bpc) < (hi[1] - lo[1])/cell_size) fputs("error: y\n", stderr);
971 if( (1<<bpc) < (hi[2] - lo[2])/cell_size) fputs("error: z\n", stderr);
972
973 fac = 1/cell_size;
974 for(i=0; i<point_cnt; i++) {
975 integer ix = (coor[i][0] - lo[0])*fac;
976 integer iy = (coor[i][1] - lo[1])*fac;
977 integer iz = (coor[i][2] - lo[2])*fac;
978 if(ix<0) ix = 0; /* correct numerical error, if any */
979 if(iy<0) iy = 0; /* correct numerical error, if any */
980 if(iz<0) iz = 0; /* correct numerical error, if any */
981 index[i].key = (ix<<(bpc*2)) | (iy<<bpc) | iz;
982 index[i].index = i;
983 }
984
985 qsort(index, point_cnt, sizeof(struct point_key_t), comp_point_key);
986 }
987
988 /* ===================================================================
989 * The parallelization section. Depending on the calculation mode, the
990 * code uses different initialization, save_batch and finalize
991 * actions. */
992 #if 0
993 static int
994 comp_weight(const void *a, const void *b)
995 { /* strangely, the sorting does not appear to help... */
996 return ((const real*)a)[3]-((const real*)b)[3];
997 }
998 #endif
999 static void
write_final_coords_and_weights(integer cnt,integer nblocks,integer (* shlblocks)[2],real * coorw,FILE * f)1000 write_final_coords_and_weights(integer cnt, integer nblocks, integer (*shlblocks)[2],
1001 real *coorw, FILE *f)
1002 {
1003 integer i;
1004 if(cnt <= 0) return;
1005 integer cnt_ll = cnt;
1006 if(fwrite(&cnt_ll, sizeof(cnt_ll), 1, f)!=1) {
1007 fprintf(stderr, "GRIDGEN: 'too short write' error.\n");
1008 dalton_quit("GRIDGEN: 'too short write' error.\n");
1009 }
1010 /* qsort(coorw, cnt, 4*sizeof(real), comp_weight); */
1011 if(fwrite(&nblocks, sizeof(nblocks), 1, f) != 1) abort();
1012 if(fwrite(shlblocks, sizeof(integer), nblocks*2, f) != nblocks*2)
1013 dalton_quit("write error in %s(), point 1", __FUNCTION__);
1014 for(i=0; i<cnt; i++)
1015 if(fwrite(coorw+i*4, sizeof(real), 3, f) != 3)
1016 dalton_quit("write error in %s(), coords", __FUNCTION__);
1017 for(i=0; i<cnt; i++)
1018 if(fwrite(coorw+i*4+3, sizeof(real), 1, f) != 1)
1019 dalton_quit("write error in %s(), weights", __FUNCTION__);
1020 }
1021
1022 static void
boxify_save_batch_local(GridGenMolGrid * mg,FILE * f,integer cnt,integer nblocks,integer (* shlblocks)[2],real * center,const integer * atom_nums,real * coorw)1023 boxify_save_batch_local(GridGenMolGrid *mg, FILE *f, integer cnt,
1024 integer nblocks, integer (*shlblocks)[2],
1025 real *center, const integer *atom_nums,
1026 real *coorw)
1027 {
1028 integer i;
1029 if(selected_partitioning->postprocess) {
1030 cnt = selected_partitioning->postprocess(mg, center, cnt, atom_nums,
1031 (real(*)[4])coorw);
1032 }
1033 if(cnt == 0) return;
1034 write_final_coords_and_weights(cnt, nblocks, shlblocks, coorw, f);
1035 }
1036
1037 #ifdef VAR_MPI
1038 #include <mpi.h>
1039 #include <our_extra_mpi.h>
1040 static integer last;
1041 static int mynum, nodes;
1042 static void
grid_par_init(real * radint,integer * angmin,integer * angint,integer * DFTGRID_DONE)1043 grid_par_init(real *radint, integer *angmin, integer *angint, integer *DFTGRID_DONE) {
1044 integer arr[4];
1045 /* Executed by master and all slaves */
1046 MPI_Comm_rank(MPI_COMM_WORLD, &mynum);
1047 MPI_Comm_size(MPI_COMM_WORLD, &nodes);
1048 last = 0;
1049 arr[0] = *angmin;
1050 arr[1] = *angint;
1051 if(selected_partitioning == &partitioning_becke_orig) arr[2] = 0;
1052 else if(selected_partitioning == &partitioning_becke_corr) arr[2] = 1;
1053 else if(selected_partitioning == &partitioning_ssf) arr[2] = 2;
1054 else /* if(selected_partitioning == &partitioning_block)*/ arr[2] = 3;
1055 arr[3] = *DFTGRID_DONE;
1056 MPI_Bcast(radint, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
1057 MPI_Bcast(arr, ELEMENTS(arr), fortran_MPI_INT, 0, MPI_COMM_WORLD);
1058 *angmin = arr[0];
1059 *angint = arr[1];
1060 /*
1061 printf("angmin %i angint %i for mynum %i",arr[0], arr[1], mynum);
1062 */
1063 switch(arr[2]) {
1064 case 0: selected_partitioning = &partitioning_becke_orig; break;
1065 case 1: selected_partitioning = &partitioning_becke_corr; break;
1066 case 2: selected_partitioning = &partitioning_ssf; break;
1067 default: selected_partitioning = &partitioning_block; break;
1068 }
1069 *DFTGRID_DONE = arr[3];
1070 }
1071
1072 static char*
grid_get_fname(const char * base,integer filenum)1073 grid_get_fname(const char *base, integer filenum)
1074 {
1075 if(filenum == 0)
1076 /*return strdup(base);*/
1077 return StringDuplicate(base);
1078 else {
1079 char *res = malloc(strlen(base) + 15);
1080 sprintf(res, "%s.%05d", base, filenum);
1081 return res;
1082 }
1083 }
1084
1085 #define M(s) {if((s) != MPI_SUCCESS) printf("MPI comm failed at %d\n", __LINE__); }
1086 static void
boxify_save_batch(GridGenMolGrid * mg,FILE * f,integer cnt,integer nbl,integer shlbl[][2],real * center,integer * atom_nums,real * coorw)1087 boxify_save_batch(GridGenMolGrid *mg, FILE *f, integer cnt,
1088 integer nbl, integer shlbl[][2],
1089 real *center, integer *atom_nums, real *coorw)
1090 {
1091 /* PARBLLEN must be low multiplicity of dftcom:MAXBLLEN
1092 * for performance reasons. */
1093 const integer PARBLLEN = 1000;
1094 /* Executed by master */
1095 integer i;
1096 for(i=0; i<cnt; i+= PARBLLEN) {
1097 integer bcnt = i+PARBLLEN<cnt ? PARBLLEN : cnt - i;
1098 last = (last+1) % nodes; /* which node should save this batch? */
1099 if(last == 0) /* master to do this work */
1100 boxify_save_batch_local(mg, f, bcnt, nbl, shlbl,
1101 center, atom_nums+i, coorw + i*4);
1102 else { /* send batch to node no. last */
1103 integer arr[2]; arr[0] = bcnt; arr[1] = nbl;
1104 M(MPI_Send(arr, 2, fortran_MPI_INT, last, 1, MPI_COMM_WORLD));
1105 M(MPI_Send(shlbl, 2*nbl, fortran_MPI_INT, last, 2, MPI_COMM_WORLD));
1106 M(MPI_Send(coorw+i*4, 4*bcnt, MPI_DOUBLE, last, 3, MPI_COMM_WORLD));
1107 M(MPI_Send(center, 3, MPI_DOUBLE, last, 4, MPI_COMM_WORLD));
1108 M(MPI_Send(atom_nums+i,bcnt, fortran_MPI_INT, last, 5, MPI_COMM_WORLD));
1109 }
1110 }
1111 }
1112
1113 #else
1114 /* not VAR_MPI */
1115 #define boxify_save_batch(mg,f,c,b,s,center,an,r) \
1116 boxify_save_batch_local((mg),(f),(c),(b),(s),(center),(an),(r))
1117 //#define grid_get_fname(base,num) strdup(base)
1118 #define grid_get_fname(base,num) StringDuplicate(base)
1119
1120 #endif
1121
1122 /* save_final saves the grid, possibly distributing it over to many
1123 * nodes. NOTE: atom_idx is used only for partitioning schemes that do
1124 * postprocessing and should not be accessed without prior checking.
1125 **/
1126 static integer
boxify_save(GridGenMolGrid * mg,const char * fname,integer point_cnt,real (* coorw)[4],const integer * atom_idx,struct point_key_t * keys,real * lo,real cell_size)1127 boxify_save(GridGenMolGrid *mg, const char *fname, integer point_cnt,
1128 real (*coorw)[4], const integer *atom_idx,
1129 struct point_key_t *keys, real *lo, real cell_size)
1130 {
1131 FILE *f;
1132 integer idx, cnt;
1133 real *rshel2;
1134 integer nblocks;
1135 integer (*shlblocks)[2] = malloc(2*FSYM2(ishell_cnt)()*sizeof(integer));
1136 integer bpc = KEY_BITS/3; /* bits per coordinate */
1137 GridPointKey mask = ~((-1)<<bpc);
1138 integer points_saved = 0;
1139 real *dt = NULL;
1140 integer dt_sz = 0, *atom_nums = NULL;
1141
1142 if((f = fopen(fname,"wb")) == NULL) {
1143 fprintf(stderr,"internal error, cannot save sorted grid file.\n");
1144 exit(1);
1145 }
1146 rshel2 = dal_new(FSYM2(ishell_cnt)(), real);
1147 FSYM(gtexts)(rshel2);
1148 for(idx=0; idx<point_cnt; idx += cnt) {
1149 integer i;
1150 real center[3];
1151 real mindist = 4*cell_size, maxdist = 0;
1152 GridPointKey key = keys[idx].key;
1153 real sx = 0, sy = 0, sz = 0;
1154 center[0] = lo[0] + (((key >> (bpc*2)) & mask)+0.5)*cell_size;
1155 center[1] = lo[1] + (((key >> bpc) & mask)+0.5)*cell_size;
1156 center[2] = lo[2] + (((key) & mask)+0.5)*cell_size;
1157 for(cnt=0; idx+cnt<point_cnt &&
1158 keys[idx+cnt].key == key; cnt++) {
1159 real dx = coorw[keys[idx+cnt].index][0]-center[0];
1160 real dy = coorw[keys[idx+cnt].index][1]-center[1];
1161 real dz = coorw[keys[idx+cnt].index][2]-center[2];
1162 real dist2 = dx*dx + dy*dy + dz*dz;
1163 if(dist2<mindist) { mindist=dist2; }
1164 if(dist2>maxdist) { maxdist=dist2; }
1165 sx += dx; sy += dy; sz += dz;
1166 }
1167
1168 /* merge data in one block */
1169 if(dt_sz<cnt) {
1170 dt_sz = cnt;
1171 dt = realloc(dt, 4*dt_sz*sizeof(real));
1172 atom_nums = realloc(atom_nums, dt_sz*sizeof(integer));
1173 }
1174 for(i=0; i<cnt; i++) {
1175 integer j = keys[idx+i].index;
1176 dt[i*4+0] = coorw[j][0];
1177 dt[i*4+1] = coorw[j][1];
1178 dt[i*4+2] = coorw[j][2];
1179 dt[i*4+3] = coorw[j][3];
1180 if(atom_idx) atom_nums[i] = atom_idx[j];
1181 }
1182 FSYM(getblocks)(center, &cell_size, rshel2,
1183 &nblocks, shlblocks);
1184 if(nblocks==0) continue;
1185
1186 if(0) {
1187 integer nblocks_int = nblocks;
1188 printf("box [%5.1f,%5.1f,%5.1f] nt=%5d "
1189 "%f <r<%f [%5.1f,%5.1f,%5.1f] %d\n",
1190 center[0], center[1], center[2], cnt,
1191 sqrt(mindist), sqrt(maxdist),
1192 sx/cnt, sy/cnt, sz/cnt, nblocks_int);
1193 for(i=0; i<nblocks; i++)
1194 #ifdef VAR_INT64
1195 printf("(%lld,%lld)",shlblocks[i][0], shlblocks[i][1]);
1196 #else
1197 printf("(%d,%d)",shlblocks[i][0], shlblocks[i][1]);
1198 #endif
1199 puts("");
1200 }
1201
1202 boxify_save_batch(mg, f, cnt, nblocks, shlblocks,
1203 center, atom_nums, dt);
1204 points_saved += cnt;
1205 }
1206 fclose(f);
1207 free(rshel2);
1208 free(shlblocks);
1209 if(dt) free(dt);
1210 if(atom_nums) free(atom_nums);
1211 if(point_cnt != points_saved)
1212 fort_print("Postprocessing compression: from %d to %d",
1213 point_cnt, points_saved);
1214 return points_saved;
1215 }
1216
1217 static void
ggen_work_init(GridGenWork * ggw,GridGenMolGrid * mg,integer mxshells)1218 ggen_work_init(GridGenWork* ggw, GridGenMolGrid* mg, integer mxshells)
1219 {
1220 ggw->rj = dal_new(mg->atom_cnt*MAX_PT_PER_SHELL, real);
1221 ggw->p_kg = dal_new(mg->atom_cnt*MAX_PT_PER_SHELL, real);
1222 ggw->vec = dal_new(mg->atom_cnt*MAX_PT_PER_SHELL, real);
1223 ggw->x = dal_new(MAX_PT_PER_SHELL*mxshells, real);
1224 ggw->y = dal_new(MAX_PT_PER_SHELL*mxshells, real);
1225 ggw->z = dal_new(MAX_PT_PER_SHELL*mxshells, real);
1226 ggw->wg = dal_new(MAX_PT_PER_SHELL*mxshells, real);
1227 }
1228
1229 static void
ggen_work_release(GridGenWork * ggw)1230 ggen_work_release(GridGenWork* ggw)
1231 {
1232 free(ggw->rj);
1233 free(ggw->p_kg);
1234 free(ggw->vec);
1235 free(ggw->x);
1236 free(ggw->y);
1237 free(ggw->z);
1238 free(ggw->wg);
1239 }
1240
1241 static GridGenMolGrid*
mgrid_new(integer atom_cnt,const GridGenAtom * atoms)1242 mgrid_new(integer atom_cnt, const GridGenAtom* atoms)
1243 {
1244 integer i, j, index, paircnt;
1245 GridGenMolGrid* mg = dal_new(1, GridGenMolGrid);
1246 mg->atom_cnt = atom_cnt;
1247 mg->atom_coords = atoms;
1248 mg->atom_grids = dal_new(atom_cnt, GridGenAtomGrid*);
1249 /* be careful with atoms (=no pairs): some systems (AIX) do not
1250 * like 0 allocations */
1251 paircnt = (atom_cnt*(atom_cnt-1))/2;
1252 mg->rij = dal_new((paircnt == 0 ? 1 : paircnt), real);
1253 mg->aij = dal_new((paircnt == 0 ? 1 : paircnt), real);
1254 index =0;
1255 for(i=0; i<atom_cnt; i++) {
1256 mg->atom_grids[i] = agrid_new(atoms[i].icent, atoms[i].Z);
1257 for(j=0; j<i; j++) {
1258 real chi = bragg_radii[atoms[i].Z]/bragg_radii[atoms[j].Z];
1259 real temp = (chi-1)/(chi+1);
1260 real dx = atoms[i].x - atoms[j].x;
1261 real dy = atoms[i].y - atoms[j].y;
1262 real dz = atoms[i].z - atoms[j].z;
1263 temp = temp/(temp*temp-1);
1264 if(temp>0.5) temp = 0.5;
1265 else if(temp<-0.5) temp = -0.5;
1266 mg->rij[index] = 1.0/sqrt(dx*dx + dy*dy + dz*dz);
1267 mg->aij[index++] = temp;
1268 }
1269 }
1270 mg->verbose = 0;
1271 return mg;
1272 }
1273
1274 static void
mgrid_free(GridGenMolGrid * mg)1275 mgrid_free(GridGenMolGrid* mg)
1276 {
1277 integer i;
1278 free(mg->rij);
1279 free(mg->aij);
1280 for(i=0; i< mg->atom_cnt; i++)
1281 agrid_free(mg->atom_grids[i]);
1282 free(mg->atom_grids);
1283 free(mg);
1284 }
1285
1286 /* mgrid_set_radial:
1287 precompute radial grid for all the atoms in the molecule.
1288 */
1289 static void
mgrid_set_radial(GridGenMolGrid * grd,real thrl)1290 mgrid_set_radial(GridGenMolGrid* grd, real thrl)
1291 {
1292 integer atom;
1293 void *data = NULL;
1294 if(radial_quad->quad_init)
1295 data = radial_quad->quad_init();
1296 for(atom=0; atom<grd->atom_cnt; atom++) {
1297 radial_quad->quad_gen(grd->atom_grids[atom], thrl, data);
1298 }
1299 if(radial_quad->quad_free)
1300 radial_quad->quad_free(data);
1301 }
1302
1303
1304 /* set_ang_fixed:
1305 computes the angular points for a set of radial points
1306 obtained from radial integration scheme.
1307 The only thing altered is grid->leb_ang vector.
1308 */
1309 static void
mgrid_set_angular_fixed(GridGenMolGrid * mgrid,real thrl,integer minang,integer maxang)1310 mgrid_set_angular_fixed(GridGenMolGrid* mgrid, real thrl,
1311 integer minang, integer maxang)
1312 {
1313 static const real BOHR = 0.529177249;
1314 integer atom, i = 0;
1315
1316 for(atom=0; atom<mgrid->atom_cnt; atom++) {
1317 integer current_ang = maxang;
1318 GridGenAtomGrid* grid = mgrid->atom_grids[atom];
1319 real rbragg = bragg_radii[grid->Z]/(5.0*BOHR);
1320
1321 for (i=0; i<grid->pnt; i++) {
1322 if(grid->rad[i]<rbragg) {
1323 /* prune */
1324 integer iang =
1325 (double)leb_gen[maxang].point_cnt*grid->rad[i]/rbragg;
1326 current_ang = leb_get_from_point(iang);
1327 if(current_ang<minang) current_ang = minang;
1328 } /* else current_ang = maxang; */
1329 grid->leb_ang[i] = current_ang;
1330 }
1331 }
1332 }
1333
1334 #if defined(USE_PTHREADS)
1335 #include <pthread.h>
1336 pthread_mutex_t grid_mutex = PTHREAD_MUTEX_INITIALIZER;
1337 static void
mgrid_lock(GridGenMolGrid * mgrid)1338 mgrid_lock(GridGenMolGrid* mgrid)
1339 {
1340 pthread_mutex_lock(&grid_mutex);
1341 }
1342 static void
mgrid_unlock(GridGenMolGrid * mgrid)1343 mgrid_unlock(GridGenMolGrid* mgrid)
1344 {
1345 pthread_mutex_unlock(&grid_mutex);
1346 }
1347 static integer
mgrid_get_num_threads(void)1348 mgrid_get_num_threads(void)
1349 { return 4; }
1350 #else
1351 #define mgrid_lock(mgrid)
1352 #define mgrid_unlock(mgrid)
1353 #endif
1354
1355 static integer
compress_grid(integer point_cnt,real * x,real * y,real * z,real * wg)1356 compress_grid(integer point_cnt, real* x, real* y, real *z, real* wg)
1357 {
1358 integer i, dest=0;
1359
1360 for(i=0; i<point_cnt; i++) {
1361 x [dest] = x [i];
1362 y [dest] = y [i];
1363 z [dest] = z [i];
1364 wg[dest] = wg[i];
1365 if(fabs(wg[i]) >WEIGHT_THRESHOLD)
1366 dest++;
1367 }
1368 return dest;
1369 }
1370
1371
1372 static void*
mgrid_compute_coords_worker(GridGenMolGrid * mgrid)1373 mgrid_compute_coords_worker(GridGenMolGrid* mgrid)
1374 {
1375 integer atom, ptno, j, idx, cnt;
1376 integer tpt;
1377 integer off = mgrid->off, done;
1378 GridGenWork ggw;
1379
1380 mgrid_unlock(mgrid); /* off has been copied, can unlock */
1381
1382 j=0;
1383 for(atom=off; atom<mgrid->atom_cnt; atom+=mgrid->nt) {
1384 if(mgrid->atom_grids[atom]->pnt>j)
1385 j = mgrid->atom_grids[atom]->pnt;
1386 }
1387 if(j==0) return NULL; /* this thread has got nothing to do */
1388 ggen_work_init(&ggw, mgrid, j);
1389
1390 for(atom=off, done=0; atom<mgrid->atom_cnt;
1391 atom+=mgrid->atom_coords[atom].mult, done++) {
1392 GridGenAtomGrid* grid = mgrid->atom_grids[atom];
1393 integer mult = mgrid->atom_coords[atom].mult;
1394 if(done % mgrid->nt != 0) /* do every mt symmetry-independent atom */
1395 continue;
1396 idx = 0;
1397 for (ptno=0; ptno<grid->pnt; ptno++) {
1398 real fact = 4*M_PI*grid->wght[ptno]*mult;
1399 real rad = grid->rad[ptno];
1400 integer ind = grid->leb_ang[ptno];
1401 leb_gen[ind].func(ggw.x+idx, ggw.y+idx, ggw.z+idx, ggw.wg+idx,
1402 &tpt);
1403
1404 for(j=idx; j<idx+leb_gen[ind].point_cnt; j++) {
1405 ggw.x[j] = ggw.x[j]*rad + mgrid->atom_coords[atom].x;
1406 ggw.y[j] = ggw.y[j]*rad + mgrid->atom_coords[atom].y;
1407 ggw.z[j] = ggw.z[j]*rad + mgrid->atom_coords[atom].z;
1408 ggw.wg[j] *= fact;
1409 }
1410 if(selected_partitioning->preprocess)
1411 selected_partitioning->preprocess
1412 (mgrid, atom, leb_gen[ind].point_cnt, &ggw, idx, 0);
1413 idx += leb_gen[ind].point_cnt;
1414 }
1415 /* degeneracy multiplication here */
1416 /* cnt = compress_grid(idx, ggw.x, ggw.y, ggw.z, ggw.wg); */
1417 cnt = idx;
1418 if(mgrid->verbose)
1419 fort_print(" DFT grid generation - Atom: %4d*%d points=%6d compressed from%6d (%3d radial)",
1420 atom+1, mult, cnt, idx, grid->pnt);
1421 if(cnt>0) {
1422 integer i;
1423 mgrid_lock(mgrid);
1424 fwrite(&cnt, sizeof(integer), 1, mgrid->fl);
1425 fwrite(&atom, sizeof(integer), 1, mgrid->fl);
1426 for(i=0; i<cnt; i++) {
1427 fwrite(ggw.x+i, sizeof(real), 1, mgrid->fl);
1428 fwrite(ggw.y+i, sizeof(real), 1, mgrid->fl);
1429 fwrite(ggw.z+i, sizeof(real), 1, mgrid->fl);
1430 fwrite(ggw.wg+i,sizeof(real), 1, mgrid->fl);
1431 }
1432 mgrid->total_points += cnt;
1433 mgrid_unlock(mgrid);
1434 }
1435 }
1436 ggen_work_release(&ggw);
1437 return NULL;
1438 }
1439
1440 static integer
mgrid_compute_coords(GridGenMolGrid * mgrid,const char * filename)1441 mgrid_compute_coords(GridGenMolGrid* mgrid, const char* filename)
1442 {
1443 if( (mgrid->fl = fopen(filename,"wb"))==NULL) {
1444 fprintf(stderr,"ERROR: Cannot open grid file '%s' for writing.\n",
1445 filename);
1446 return 0;
1447 }
1448 mgrid->total_points = 0;
1449 #if defined(USE_PTHREADS)
1450 mgrid->nt = mol_grid_get_num_threads();
1451 { integer i;
1452 pthread_t *ptid = dal_new(mgrid->nt, pthread_t);
1453 for(i=0; i<mgrid->nt; i++) {
1454 mol_grid_lock(mgrid);
1455 mgrid->off = i;
1456 pthread_create(&ptid[i], NULL,
1457 (void *(*)(void *))mgrid_compute_coords_worker, mgrid);
1458 }
1459 for(i=0; i<mgrid->nt; i++)
1460 pthread_join(ptid[i], NULL);
1461 }
1462 #else
1463 mgrid->nt = 1; mgrid->off = 0;
1464 mgrid_compute_coords_worker(mgrid);
1465 #endif
1466 fclose(mgrid->fl);
1467
1468 return mgrid->total_points;
1469 }
1470
1471
1472 static integer
mgrid_boxify(GridGenMolGrid * mg,const char * fname,integer point_cnt,real cell_size,void * work,integer worksz)1473 mgrid_boxify(GridGenMolGrid *mg, const char* fname,
1474 integer point_cnt, real cell_size,
1475 void* work, integer worksz)
1476 {
1477 real (*coorw)[4];
1478 real lo[3], hi[3];
1479 struct point_key_t * keys =
1480 dal_new(point_cnt, struct point_key_t);
1481 integer coorw_allocated = 0;
1482 integer *atom_idx;
1483 integer new_point_cnt;
1484
1485 atom_idx = selected_partitioning->postprocess ?
1486 dal_new(point_cnt, integer) : NULL;
1487
1488 boxify_load_grid(fname, point_cnt, work, worksz,
1489 &coorw, &coorw_allocated, atom_idx);
1490 boxify_create_index(cell_size, coorw, point_cnt, keys, lo, hi);
1491 new_point_cnt = boxify_save(mg, fname, point_cnt, coorw, atom_idx,
1492 keys, lo, cell_size);
1493 free(keys);
1494 if(coorw_allocated) free(coorw);
1495 if(atom_idx) free(atom_idx);
1496 return new_point_cnt;
1497 }
1498
1499 /* grid_generate:
1500 returns number of grid points.
1501 */
1502 integer
grid_generate(const char * filename,integer atom_cnt,const GridGenAtom * atom_arr,real threshold,GridGeneratingFunc generating_function,void * arg,integer minang,integer maxang,real * work,integer * lwork,integer verbose)1503 grid_generate(const char* filename, integer atom_cnt,
1504 const GridGenAtom* atom_arr, real threshold,
1505 GridGeneratingFunc generating_function, void* arg,
1506 integer minang, integer maxang, real* work, integer *lwork, integer verbose)
1507 {
1508 integer res;
1509 struct tms starttm, endtm; clock_t utm;
1510 GridGenMolGrid* mgrid = mgrid_new(atom_cnt, atom_arr);
1511 if(verbose) {
1512 fort_print(" DFT grid generation - Radial Quadrature : %s", radial_quad->name);
1513 fort_print(" DFT grid generation - Partitioning : %s", selected_partitioning->name);
1514 fort_print(" DFT grid generation - Radial integration threshold: %g", threshold);
1515 fort_print(" DFT grid generation - Angular polynomials in range [%i %i]", minang, maxang);
1516 }
1517
1518 times(&starttm);
1519 mgrid->verbose = 0;
1520 if(verbose>0) mgrid->verbose = verbose-1;
1521
1522 mgrid_set_radial(mgrid, threshold);
1523
1524 mgrid_set_angular_fixed(mgrid, threshold, leb_get_from_order(minang),
1525 leb_get_from_order(maxang));
1526
1527 res = mgrid_compute_coords(mgrid, filename);
1528
1529 res = mgrid_boxify(mgrid, filename, res, CELL_SIZE,
1530 work, *lwork*sizeof(real));
1531 mgrid_free(mgrid);
1532
1533 times(&endtm);
1534 utm = endtm.tms_utime-starttm.tms_utime;
1535
1536 if(verbose)
1537 fort_print(" DFT grid generation - Number of grid points: %8d; grid generation time:%9.1f s \n",
1538 res, utm/(double)sysconf(_SC_CLK_TCK));
1539 return res;
1540 }
1541
1542
1543 /* ------------------------------------------------------------------- */
1544 /* the dft grid input routines. */
1545 /* choose different types of grid: */
1546 /* The syntax is: */
1547 /* ((GC2|LMG) | (SSF|BECKE|BECKEORIG)) */
1548 /* Sets */
1549 /* radial_quad */
1550 /* selected_partitioning */
1551 /* ------------------------------------------------------------------- */
1552 static integer
get_word(const char * line,integer st,integer max_len)1553 get_word(const char *line, integer st, integer max_len)
1554 {
1555 integer res;
1556 for(res=st; res<max_len && isalnum(line[res]); res++)
1557 ;
1558 if(res>=max_len) res = 0;
1559 return res;
1560 }
1561
1562 static integer
skip_spaces(const char * line,integer st,integer max_len)1563 skip_spaces(const char *line, integer st, integer max_len)
1564 {
1565 integer res;
1566 for(res=st; res<max_len && isspace(line[res]); res++)
1567 ;
1568 return res;
1569 }
1570
1571 /* ------------------------------------------------------------------- */
1572 /* the integrator/grid generator itself */
1573 /* ------------------------------------------------------------------- */
1574 void
grid_gen_set_part_scheme(GridGenPartScheme scheme)1575 grid_gen_set_part_scheme(GridGenPartScheme scheme)
1576 {
1577 switch(scheme) {
1578 case GRID_PART_BECKE_CORR:
1579 selected_partitioning = &partitioning_becke_corr;
1580 break;
1581 case GRID_PART_BECKE_ORIG:
1582 selected_partitioning = &partitioning_becke_orig;
1583 break;
1584 case GRID_PART_SSF:
1585 selected_partitioning = &partitioning_ssf;
1586 break;
1587 case GRID_PART_BLOCK:
1588 selected_partitioning = &partitioning_block;
1589 break;
1590 }
1591 }
1592
1593 /* grid_gen_set_rad_quad:
1594 set radial quadrature.
1595 */
1596 void
grid_gen_set_rad_quad(GridGenQuad scheme)1597 grid_gen_set_rad_quad(GridGenQuad scheme)
1598 {
1599 switch(scheme) {
1600 default:
1601 case GRID_RAD_GC2: radial_quad = &quad_gc2; break;
1602 case GRID_RAD_LMG: radial_quad = &quad_lmg; break;
1603 }
1604 }
1605
1606
1607 void
FSYM(dftgridinput)1608 FSYM(dftgridinput)(const char *line, integer line_len)
1609 {
1610 static const char* keywords[] = {
1611 "GC2","LMG", "SSF","BECKE","BECKEORIG","BLOCK","CARTESIAN"
1612 };
1613 integer st, en, i;
1614 integer start = skip_spaces(line, 0, line_len);
1615 for(st=start; (en=get_word(line, st, line_len)) != 0;
1616 st=skip_spaces(line, en, line_len)) {
1617 for(i=0; i<ELEMENTS(keywords); i++) {
1618 if(strncasecmp(keywords[i], line+st, en-st)==0)
1619 break;
1620 }
1621 switch(i) {
1622 case 0: radial_quad = &quad_gc2; break;
1623 case 1: radial_quad = &quad_lmg; break;
1624 case 2: selected_partitioning=&partitioning_ssf;break;
1625 case 3:
1626 selected_partitioning = &partitioning_becke_corr; break;
1627 case 4:
1628 selected_partitioning = &partitioning_becke_orig; break;
1629 case 5:
1630 selected_partitioning = &partitioning_block; break;
1631 case 6:
1632 gridType = GRID_TYPE_CARTESIAN; break;
1633 default: fort_print("GRIDGEN: Unknown .GRID TYPE option ignored.\n%s",
1634 line);
1635 /* FIXME: should I quit here? */
1636 }
1637 }
1638 }
1639
1640 /* ===================================================================
1641 GENERAL GRID MANIPULATION ROUTINES.
1642 =================================================================== */
1643 /* generate a list of atoms from Dalton common block data, taking into
1644 account symmetries in the molecule.
1645 The list of atoms is accessed in two modes:
1646 - grid is generated only for symmetry-independent atoms...
1647 - ... but all atoms need to be taken into account when performing
1648 the space partitioning.
1649 */
1650 extern void FSYM2(get_no_atoms)(integer* atom_cnt);
1651 extern void FSYM2(get_atom_by_icent)(const integer* icent, real* charge,
1652 integer *cnt, integer *mult,
1653 real *x, real *y, real *z);
1654
1655 static GridGenAtom*
grid_gen_atom_new(integer * atom_cnt)1656 grid_gen_atom_new(integer* atom_cnt)
1657 {
1658 integer icent, nat, nat_total;
1659 integer cnt, mult;
1660 integer i;
1661 real x[8], y[8], z[8], charge;
1662 GridGenAtom* atoms;
1663
1664 // gets total number of centers
1665 FSYM2(get_no_atoms)(atom_cnt);
1666 // find out which need grids
1667 // charge is ABS(IZATOM(N)), skip zero (floating orbital) and 1234567890 (point charge)
1668 icent = 0;
1669 nat = 0;
1670 nat_total = 0;
1671 do {
1672 icent++;
1673 FSYM2(get_atom_by_icent)(&icent, &charge, &cnt, &mult, x, y, z);
1674 for(i=0; i<cnt; i++) {
1675 if (charge == 0 || charge > 12345){
1676 nat_total++;
1677 continue;
1678 }
1679 nat++;
1680 nat_total++;
1681 }
1682 } while(nat_total<*atom_cnt);
1683
1684 //allocate and fill atoms
1685 atoms = dal_new(nat, GridGenAtom);
1686 icent = 0;
1687 nat = 0;
1688 nat_total = 0;
1689 do {
1690 icent++;
1691 FSYM2(get_atom_by_icent)(&icent, &charge, &cnt, &mult, x, y, z);
1692 for(i=0; i<cnt; i++) {
1693 if (charge == 0 || charge > 12345){
1694 nat_total++;
1695 continue;
1696 }
1697 atoms[nat].x = x[i];
1698 atoms[nat].y = y[i];
1699 atoms[nat].z = z[i];
1700 atoms[nat].icent = icent-1;
1701 atoms[nat].Z = charge;
1702 atoms[nat].mult = mult;
1703 nat++;
1704 nat_total++;
1705 }
1706 } while(nat_total<*atom_cnt);
1707 *atom_cnt = nat;
1708 return atoms;
1709 }
1710
1711 /* =================================================================== */
1712 /* Grid I/O routines.
1713 *
1714 * Used by integrator to fetch data. The routines are located here
1715 * since the grid file format is an internal implementation issue of
1716 * the grid generator: it may contain not only grid positions and
1717 * weights but also other metadata.
1718 *
1719 * Two routines are provided. One is designed for blocked approach,
1720 * the other one is a plain point-by-point grid reader.
1721 */
1722
1723 #ifdef VAR_MPI
1724 static void
grid_par_shutdown(void)1725 grid_par_shutdown(void)
1726 {
1727 integer i;
1728 integer arr[2]; arr[0] = 0; arr[1] = 0;
1729 /* Executed by master only */
1730 for(i=1; i<nodes; i++)
1731 M(MPI_Send(arr, 2, fortran_MPI_INT, i, 1, MPI_COMM_WORLD));
1732 }
1733 static void
grid_par_slave(const char * fname,real threshold)1734 grid_par_slave(const char *fname, real threshold)
1735 {
1736 integer (*shlblocks)[2];
1737 MPI_Status s;
1738 char * nm = grid_get_fname(fname, mynum);
1739 FILE *f;
1740 integer ishlcnt = FSYM2(ishell_cnt)();
1741 real *dt = NULL;
1742 integer dt_sz = 0;
1743 integer atom_cnt, point_cnt=0;
1744 GridGenAtom* atoms = grid_gen_atom_new(&atom_cnt);
1745 GridGenMolGrid* mg = mgrid_new(atom_cnt, atoms);
1746 integer *atom_nums = NULL;
1747
1748 if((f=fopen(nm, "wb")) == NULL) dalton_quit("Slave could not save.");
1749 mgrid_set_radial(mg, threshold); /* so that it knows the atom radii */
1750
1751 shlblocks = malloc(2*ishlcnt*sizeof(integer));
1752 do {
1753 integer arr[2], lda;
1754 integer arr_integer;
1755 real center[3];
1756 M(MPI_Recv(arr, 2, fortran_MPI_INT, 0, 1, MPI_COMM_WORLD, &s));
1757 if(arr[0] == 0)break; /* End Of Job */
1758
1759 M(MPI_Recv(shlblocks, 2*arr[1], fortran_MPI_INT, 0, 2, MPI_COMM_WORLD, &s));
1760 if(arr[0]>dt_sz) {
1761 dt = realloc(dt, 4*arr[0]*sizeof(real));
1762 atom_nums = realloc(atom_nums, arr[0]*sizeof(integer));
1763 dt_sz = arr[0];
1764 }
1765 M(MPI_Recv(dt, 4*arr[0], MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, &s));
1766 M(MPI_Recv(center, 3, MPI_DOUBLE, 0, 4, MPI_COMM_WORLD, &s));
1767 M(MPI_Recv(atom_nums,arr[0], fortran_MPI_INT, 0, 5, MPI_COMM_WORLD, &s));
1768 if(selected_partitioning->postprocess)
1769 arr[0] = selected_partitioning->postprocess
1770 (mg, center, arr[0], atom_nums, (real(*)[4])dt);
1771 arr_integer = arr [1];
1772 write_final_coords_and_weights(arr[0], arr_integer, shlblocks, dt, f);
1773 point_cnt += arr[0];
1774 } while(1);
1775 fclose(f);
1776 if(dt)
1777 free(dt);
1778 if(atom_nums)
1779 free(atom_nums);
1780 free(nm);
1781 free(shlblocks);
1782 mgrid_free(mg);
1783 free(atoms);
1784 }
1785 #endif /* VAR_MPI */
1786 /* private definition */
1787 struct DftGridReader_ {
1788 FILE *f;
1789 };
1790 void FSYM2(get_grid_paras)(integer *DFTGRID_DONE, real *radint, integer *angmin, integer *angint);
1791 void FSYM2(set_grid_done)(void);
1792
1793 DftGridReader*
grid_open(integer nbast,real * dmat,real * work,integer * lwork,integer verbose)1794 grid_open(integer nbast, real *dmat, real *work, integer *lwork, integer verbose)
1795 {
1796 DftGridReader *res = dal_new(1, DftGridReader);
1797 integer DFTGRID_DONE, angmin, angint;
1798 real radint;
1799 char *fname;
1800
1801 switch(gridType) {
1802 case GRID_TYPE_STANDARD:
1803 FSYM2(get_grid_paras)(&DFTGRID_DONE, &radint, &angmin, &angint);
1804 #ifdef VAR_MPI
1805 grid_par_init(&radint, &angmin, &angint, &DFTGRID_DONE);
1806 #endif
1807 if(!DFTGRID_DONE) {
1808 integer atom_cnt;
1809 integer lwrk = *lwork - nbast;
1810 GridGenAtom* atoms = grid_gen_atom_new(&atom_cnt);
1811 struct RhoEvalData dt; /* = { grid, work, lwork, dmat, dmgao}; */
1812 dt.grid = NULL; dt.work = work+nbast; dt.lwork = &lwrk;
1813 dt.dmat = NULL; dt.dmagao = work;
1814
1815 #ifdef VAR_MPI
1816 if(mynum == 0) {
1817 grid_generate("DALTON.cQUAD", atom_cnt, atoms,
1818 radint, NULL, &dt,
1819 angmin, angint, work, lwork, verbose);
1820 grid_par_shutdown();
1821 } else
1822 grid_par_slave("DALTON.cQUAD", radint);
1823 /* Stop on barrier here so that we know all nodes managed to save
1824 * their files. */
1825 MPI_Barrier(MPI_COMM_WORLD);
1826 #else
1827 grid_generate("DALTON.cQUAD", atom_cnt, atoms,
1828 radint, NULL, &dt,
1829 angmin, angint, work, lwork, verbose);
1830 #endif
1831 free(atoms);
1832 FSYM2(set_grid_done)();
1833 }
1834 fname = grid_get_fname("DALTON.cQUAD", mynum);
1835 res->f=fopen(fname, "rb");
1836 free(fname);
1837 if(res == NULL) {
1838 perror("DFT quadrature grid file DALTON.cQUAD not found.");
1839 free(res);
1840 abort();
1841 }
1842 return res;
1843 case GRID_TYPE_CARTESIAN:
1844 #ifdef VAR_MPI
1845 perror("Error: cartesian grid not implemented for MPI.\n");
1846 free(res);
1847 abort();
1848 #endif
1849 if(dmat == NULL)
1850 {
1851 perror("Error: cartesian grid requested without dmat.\n");
1852 perror("(cartesian grid not implemented "
1853 "for open shell).\n");
1854 free(res);
1855 abort();
1856 }
1857 do_cartesian_grid(nbast, dmat, res);
1858 if( (res->f=fopen("DALTON.cQUAD", "rb")) == NULL) {
1859 perror("DFT quadrature grid file DALTON.cQUAD not found.");
1860 free(res);
1861 abort();
1862 }
1863 return res;
1864 default:
1865 perror("Error in grid_open: unknown grid type\n");
1866 free(res);
1867 abort();
1868 } /* END SWITCH */
1869 return NULL; /* to keep some compilers quiet */
1870 }
1871
1872
1873 DftGridReader*
grid_open_cmo(integer nbast,const real * cmo,real * work,integer * lwork,integer iprint)1874 grid_open_cmo(integer nbast, const real *cmo, real *work, integer *lwork, integer iprint)
1875 {
1876 real *dmat = dal_new(nbast*nbast, real);
1877 DftGridReader *reader;
1878 FSYM2(dft_get_ao_dens_mat)(cmo, dmat, work, lwork);
1879 reader = grid_open(nbast, dmat, work, lwork, iprint);
1880 free(dmat);
1881 return reader;
1882 }
1883
1884
1885 /** grid_getchunk_blocked() reads grid data also with screening
1886 information if only nblocks and shlblocks are provided.
1887 */
1888 integer
grid_getchunk_blocked(DftGridReader * rawgrid,integer maxlen,integer * nblocks,integer (* shlblocks)[2],real (* coor)[3],real * weight)1889 grid_getchunk_blocked(DftGridReader* rawgrid, integer maxlen,
1890 integer *nblocks, integer (*shlblocks)[2],
1891 real (*coor)[3], real *weight)
1892 {
1893 integer sz = 0, rc, bl_cnt;
1894 FILE *f = rawgrid->f;
1895
1896 if(fread(&sz, sizeof(integer), 1, f) <1)
1897 return -1; /* end of file */
1898 if(sz>maxlen) {
1899 #ifdef VAR_INT64
1900 fprintf(stderr,
1901 "grid_getchunk: too long vector length in file: %lld > %lld\n"
1902 "Calculation will stop.\n", sz, maxlen);
1903 dalton_quit("grid_getchunk: too long vector length in file: %lld > %lld\n"
1904 "Calculation will stop.\n", sz, maxlen);
1905 #else
1906 fprintf(stderr,
1907 "grid_getchunk: too long vector length in file: %d > %d\n"
1908 "Calculation will stop.\n", sz, maxlen);
1909 dalton_quit("grid_getchunk: too long vector length in file: %d > %d\n"
1910 "Calculation will stop.\n", sz, maxlen);
1911 #endif
1912 return -1; /* stop this! */
1913 }
1914
1915 if(fread(&bl_cnt, sizeof(integer), 1, f) <1) {
1916 puts("OCNT reading error."); return -1;
1917 }
1918 if(nblocks) *nblocks = bl_cnt;
1919
1920 if(shlblocks) {
1921 rc = fread(shlblocks, sizeof(integer), bl_cnt*2, f);
1922 } else {
1923 integer cnt;
1924 integer buf;
1925 rc = 0;
1926 for(cnt=0; cnt<bl_cnt*2; cnt+=rc) {
1927 rc=fread(&buf, sizeof(integer), 1, f);
1928 if( rc < 1)
1929 break;
1930 }
1931 }
1932 if(rc<1) {
1933 #ifdef VAR_INT64
1934 fprintf(stderr,
1935 "IBLOCKS reading error: failed to read %lld blocks.\n",
1936 *nblocks);
1937 #else
1938 fprintf(stderr,
1939 "IBLOCKS reading error: failed to read %d blocks.\n",
1940 *nblocks);
1941 #endif
1942 return -1;
1943 }
1944
1945 if(fread(coor, sizeof(real), 3*sz, f) < sz) { puts("XYZ");return -1;}
1946 if(fread(weight, sizeof(real), sz, f) < sz) { puts("W");return -1;}
1947 return sz;
1948 }
1949
1950
1951 void
grid_close(DftGridReader * rawgrid)1952 grid_close(DftGridReader *rawgrid)
1953 {
1954 fclose(rawgrid->f);
1955 free(rawgrid);
1956 }
1957
1958 /* ------------------------------------------------------------------- */
1959 /* FORTRAN INTERFACE */
1960 /* ------------------------------------------------------------------- */
1961 static DftGridReader *grid = NULL;
1962 void
FSYM(opnqua)1963 FSYM(opnqua)(const integer *nbast, real *dmat, real *work, integer *lwork, const integer *iprint)
1964 {
1965 grid = grid_open(*nbast, dmat, work, lwork, *iprint);
1966 }
1967
1968 void
FSYM(clsqua)1969 FSYM(clsqua)(void)
1970 {
1971 grid_close(grid);
1972 grid = NULL;
1973 }
1974
1975 void
FSYM(reaqua)1976 FSYM(reaqua)(integer *nshell, integer (*shell_blocks)[2],
1977 const integer *buf_sz,
1978 real (*coor)[3], real *weight, integer *nlen)
1979 {
1980 *nlen = grid_getchunk_blocked(grid, *buf_sz, nshell, shell_blocks,
1981 coor, weight);
1982 }
1983
1984 /* ------------------------------------------------------------------- */
1985 /* self test routines of the integrator/grid generator */
1986 /* ------------------------------------------------------------------- */
1987
1988 #if defined(GRID_GEN_TEST) && GRID_GEN_TEST == 1
1989 static const TestMolecule* test_molecule = NULL;
1990 typedef struct TestMolecule_ TestMolecule;
1991 struct TestMolecule_ {
1992 const GridGenAtom* atoms;
1993 const real* overlaps;
1994 integer atom_cnt;
1995 };
1996
1997
1998 const static GridGenAtom hydrogen1_atoms[] = {
1999 { 0.0, 0.0, 0.0, 1} /* single H atom at (0,0,1) */
2000 };
2001 const static real hydrogen1_overlaps[] = { 1 };
2002
2003 const static GridGenAtom hydrogen2_atoms[] = {
2004 { 0.0, 0.0, 0.0, 1}, /* single H atom at (0,0,1) */
2005 { 0.0, 0.0, 1.0, 1} /* single H atom at (0,0,2) */
2006 };
2007
2008 const static real hydrogen2_overlaps[] = {
2009 0.53726234893, 0.53726234893
2010 };
2011 const static TestMolecule test_molecules[] = {
2012 { hydrogen1_atoms, hydrogen1_overlaps, ELEMENTS(hydrogen1_atoms) },
2013 { hydrogen2_atoms, hydrogen2_overlaps, ELEMENTS(hydrogen2_atoms) }
2014 };
2015
2016 /* ------------------------------------------------------------------- */
2017 /* ------------------------------------------------------------------- */
2018 /* Test functions needed for testing the integrator/grid generator */
2019 /* ------------------------------------------------------------------- */
2020
2021 /* wave function evaluator;
2022 * does not include overlaps, this should be fixed. */
2023 static void
get_rho_grad(real x,real y,real z,real * rho,real * grad)2024 get_rho_grad(real x, real y, real z, real* rho, real* grad)
2025 {
2026 const static real COEF=1.99990037505132565043;
2027 const static real ALPHA=0.62341234;
2028 real val = 0, gradx=0, grady=0, gradz=0;
2029 integer i;
2030 val = 0;
2031 for(i=0; i<test_molecule->atom_cnt; i++) {
2032 real dx = x-test_molecule->atoms[i].x,
2033 dy = y-test_molecule->atoms[i].y,
2034 dz = z-test_molecule->atoms[i].z;
2035 real r2 = dx*dx+dy*dy+dz*dz;
2036 real ao = test_molecule->overlaps[i]*exp(-ALPHA*r2);
2037 val += ao;
2038 if(grad) {
2039 real fact = 4*test_molecule->overlaps[i]*ALPHA*exp(-ALPHA*r2)/COEF;
2040 gradx -= dx*fact;
2041 grady -= dy*fact;
2042 gradz -= dz*fact;
2043 }
2044 }
2045 val /= COEF;
2046 *rho = val*2*val;
2047
2048 if(grad) {
2049 gradx *= 2*val;
2050 grady *= 2*val;
2051 gradz *= 2*val;
2052 /* printf("grad: (%g,%g,%g)\n", gradx, grady, gradz); */
2053 *grad = sqrt(gradx*gradx + grady*grady + gradz*gradz);
2054 }
2055 }
2056
2057 static real
norm3(real x,real y,real z)2058 norm3(real x, real y, real z)
2059 {
2060 const static real ALPHA=0.62341234;
2061 real r2 = x*x + y*y+ z*z;
2062 real norm= exp(-2*ALPHA*r2);
2063 return norm;
2064 }
2065
2066 static real
rho(real x,real y,real z)2067 rho(real x, real y, real z)
2068 {
2069 real rho;
2070 get_rho_grad(x,y,z, &rho, NULL);
2071 return rho;
2072 }
2073
2074
2075 /* dirac3:
2076 single atom: -.76151426465514650000
2077 */
2078 static real
dirac3(real x,real y,real z)2079 dirac3(real x, real y, real z)
2080 {
2081 const real PREF= -3.0/4.0*pow(6/M_PI, 1.0/3.0);
2082 real rhoa;
2083 get_rho_grad(x,y,z, &rhoa, NULL);
2084 rhoa *= 0.5;
2085 return PREF*2*(pow(rhoa,4.0/3.0));
2086 }
2087
2088 static real
becke3(real x,real y,real z)2089 becke3(real x, real y, real z)
2090 {
2091 real rhoa, grada;
2092 real xa,asha, ea;
2093 const static real BETA=0.0042;
2094
2095 get_rho_grad(x,y,z, &rhoa, &grada);
2096 rhoa *= 0.5; grada *=0.5;
2097 if(rhoa<1e-10) return 0;
2098
2099 xa = grada*pow(rhoa,-1.0/3.0)/rhoa;
2100 asha = asinh(xa);
2101 ea = -2*grada*xa*BETA/(1+6*xa*BETA*asha);
2102 return ea;
2103 }
2104
2105 static real
lyp3(real x,real y,real z)2106 lyp3(real x, real y, real z)
2107 {
2108 const real A = 0.04918, B = 0.132, C = 0.2533, D = 0.349;
2109 const real CF = 0.3*pow(3*M_PI*M_PI,2.0/3.0);
2110
2111 real rho, rhom13, denom, omega, delta, ret, ngrad2, ngrada2, ngradb2;
2112 real rho2, t1, t2, t3, t4, t5, t6;
2113 real rhoa = 0.0, rhob = 0.0, grada = 0.0, gradb = 0.0;
2114
2115 get_rho_grad(x,y,z, &rhoa, &grada);
2116 if(rhoa<1e-10) return 0;
2117 rhob=rhoa;
2118 gradb=grada;
2119
2120 rho = rhoa+rhob;
2121 rho2 = rho*rho;
2122 ngrad2 = (grada+gradb)*(grada+gradb);
2123 ngrada2 = grada*grada;
2124 ngradb2 = gradb*gradb;
2125 rhom13 = pow(rho,-1.0/3.0);
2126 denom = 1+D*rhom13;
2127 omega = exp(-C*rhom13)/denom*pow(rho,-11.0/3.0);
2128 delta = rhom13*(C + D/denom);
2129 t1 = pow(2.0,11.0/3.0)*CF*(pow(rhoa,8.0/3.0) +pow(rhob,8.0/3.0));
2130 t2 = (47.0 - 7.0*delta)*ngrad2/18.0;
2131 t3 = -(2.5 -delta/18.0)*(ngrada2+ngradb2);
2132 t4 = (11.0-delta)/9.0*(rhoa*ngrada2 + rhob*ngradb2)/rho;
2133 t5 = -2.0/3.0*rho2*ngrad2;
2134 t6 = ((2.0/3.0*rho2-rhoa*rhoa)*ngradb2 +
2135 (2.0/3.0*rho2-rhob*rhob)*ngrada2);
2136 ret = -A*(4*rhoa*rhob/(denom*rho)
2137 +B*omega*(rhoa*rhob*(t1+t2+t3+t4)+t5+t6));
2138 return ret;
2139 }
2140
2141 /* ------------------------------------------------------------------- */
2142 /* main test driver */
2143 /* ------------------------------------------------------------------- */
2144 integer
main(integer argc,char * argv[])2145 main(integer argc, char* argv[])
2146 {
2147 integer i, arg, no;
2148 real THRESHOLD = 1e-3;
2149 const static struct ff {
2150 GGFunc func;
2151 char * name;
2152 } functions_to_try[] = {
2153 /*{ norm3, "Norm" }, */
2154 { dirac3,"Dirac " },
2155 { becke3,"Becke " },
2156 { rho, "charge" }
2157 /* { lyp3, "LYP" } */
2158 };
2159 include_selected_partitioning = include_partitioning_becke1;
2160
2161 if(argc<=1) {
2162 fprintf(stderr, "No arguments specified.\n"
2163 "\t-v : increase verbosity level.\n"
2164 "\t-t THR: specify new threshold level.\n"
2165 "\t-b : use Becke partitioning (default).\n"
2166 "\t-s : use SSF partitioning.\n"
2167 "\tnumber: molecule to run.\n");
2168 return 1;
2169 }
2170 for(arg=1; arg<argc; arg++) {
2171 if(strcmp(argv[arg], "-v")==0)
2172 { verbose++; puts("Verbose++"); }
2173 else if(strcmp(argv[arg], "-t")==0) {
2174 THRESHOLD = atof(argv[++arg]);
2175 printf("Threshold set to: %g\n", THRESHOLD);
2176 } else if(strcmp(argv[arg], "-b")==0) {
2177 include_selected_partitioning = include_partitioning_becke1;
2178 puts("Becke partitioning");
2179 } else if(strcmp(argv[arg], "-s")==0) {
2180 include_selected_partitioning = include_partitioning_ssf;
2181 puts("SSF partitioning");
2182 } else if( (no=atoi(argv[arg]))>0 &&
2183 no<= ELEMENTS(test_molecules)) {
2184 GridGenMolGrid* mgrid;
2185 printf("Testing molecule: %d\n", no);
2186 no--;
2187 test_molecule = &test_molecules[no];
2188 mgrid =
2189 mol_grid_new(test_molecule->atom_cnt, test_molecule->atoms,
2190 THRESHOLD);
2191
2192 for(i=0; i< ELEMENTS(functions_to_try); i++) {
2193 if(verbose)
2194 printf("%5s: Number of radial grid points: %d\n",
2195 functions_to_try[i].name,
2196 mgrid->atom_grids[0]->pnt);
2197 set_radial_grid(mgrid);
2198 set_ang(mgrid, THRESHOLD, functions_to_try[i].func);
2199 /* integration */
2200 printf("%5s: Integrated function : %20.15f\n",
2201 functions_to_try[i].name,
2202 integrate(mgrid, functions_to_try[i].func));
2203 }
2204 mol_grid_free(mgrid);
2205 }
2206 }
2207 return 0;
2208 }
2209 #endif /* defined(GRID_GEN_TEST) && GRID_GEN_TEST == 1 */
2210
2211
2212 /*Replacement for strdup() on certain systems*/
StringDuplicate(const char * s1)2213 char *StringDuplicate(const char *s1)
2214 {
2215 #if defined(SYS_DARWIN)
2216 size_t len = strlen (s1) + 1;
2217 char *res = malloc (len);
2218 if (res)
2219 memcpy (res, s1, len);
2220 return res;
2221 #else
2222 return strdup(s1);
2223 #endif
2224 }
2225
2226