1 /*
2
3
4 !
5 ! Dalton, a molecular electronic structure program
6 ! Copyright (C) by the authors of Dalton.
7 !
8 ! This program is free software; you can redistribute it and/or
9 ! modify it under the terms of the GNU Lesser General Public
10 ! License version 2.1 as published by the Free Software Foundation.
11 !
12 ! This program is distributed in the hope that it will be useful,
13 ! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ! Lesser General Public License for more details.
16 !
17 ! If a copy of the GNU LGPL v2.1 was not distributed with this
18 ! code, you can obtain one at https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html.
19 !
20
21 !
22
23 */
24 /* The multi-scale cartesian cubature grid generator. The original
25 * implementation described in M. Challacombe, JCP 113(22),
26 * p.10037. This one is modified wrt to the paper.
27 *
28 * Elias Rudberg, 2004-04
29 */
30
31 #define __CVERSION__
32 #if !defined(SYS_DEC)
33 /* XOPEN compliance is missing on old Tru64 4.0E Alphas and pow() prototype
34 * is not specified. */
35 #define _XOPEN_SOURCE 500
36 #define _XOPEN_SOURCE_EXTENDED 1
37 #endif
38
39 #include <stdlib.h>
40 #include <math.h>
41 #include <stdio.h>
42 #include <errno.h>
43 #include <memory.h>
44 #include <time.h>
45 #include <stdarg.h>
46 #include "general.h"
47 #include "grid-gen.h"
48 #include "basisinfo.h"
49
50 /* #define USE_PTHREADS */
51 #ifdef USE_PTHREADS
52 #include <pthread.h>
53 #endif
54
55 static const integer CUBATURE_RULE = 3;
56 static const integer CUBATURE_RULE_2 = 6;
57 static const integer NO_OF_DIMENSIONS = 3; /* 1, 2 or 3 */
58 static const real CONSTPI = M_PI;
59
60
61 /* Whether hierarchical cubature should be used or not. Modified by
62 * a keyword in the input file. */
63 int global_useHiCu = 0;
64
65 static integer global_maxNoOfShells = 44444;
66
67 /* This variable is used to keep track of when it is time to
68 create a new grid, together with parameter global_nFreeze. */
69 static integer global_gridCount = 0;
70
71
72 /* ---------- CONFIGURABLE PARAMETERS ------------------------------
73 ---------- CONFIGURABLE PARAMETERS ------------------------------
74 ---------- CONFIGURABLE PARAMETERS ------------------------------ */
75 /* number of threads */
76 static integer global_nThreads = 1;
77
78 /* flag for test integration. If turned on, the grid file is
79 reopened after it has been created, and the density is integrated
80 using the newly created grid. Just to check that it gives the
81 same result as reported by the grid generation and by
82 the dft integrator. */
83 static integer global_doTestIntegration = 0;
84
85 /* Output level. 0 means minimum output, 1 means little output,
86 2 means a lot of output. */
87 static integer global_outputLevel = 1;
88
89 /* Number of iterations to use the same grid, before creating
90 a new one. nFreeze=1 gives a new grid for each iteration.
91 nFreeze=1000 means that the first grid is used throughout
92 the whole calculation. */
93 static integer global_nFreeze = 1000;
94
95 /* Threshold value for distributions. A gaussian is ignored in areas
96 where its value is below this threshold. A low value is
97 computationally expensive. */
98 static real global_targetRhoError = 1.0e-10;
99
100 /* Cutoff value used to decide which gaussian products should
101 be ignored. A product with a lower coefficient than this
102 will be thrown away. */
103 static real global_distrCutoff = 1.0e-11;
104
105 /* 3d space is partitioned into boxes of this size. */
106 static real global_boxdist = 1.5;
107
108 /* Main threshold error value for grid generation. The difference of
109 analytical and numerical integrals is forced below this value.
110 This is the most important parameter, and probably the only one
111 that a typical user should worry about. */
112 static real global_maxerror = 1.0e-7;
113
114
115
116
117
118
119 #define USE_EXP_STD
120 #define USE_ERF_STD
121 #define DO_EXTRA_ERROR_CHECKING
122 #define FILE_BATCH_N 200000
123 #define MAX_NO_OF_POINTS_PER_BATCH 100
124 #define MAX_NO_OF_SHLBLOCKS 44444
125 #define EXPONENT_DIFF_LIMIT 1e-22
126 #define DISTR_CENTER_DIST_LIMIT 1e-22
127 #define N_BATCH_JOBS 22
128 #define MAX_NO_OF_POINTS_PER_WRITE 50000
129
130 #if !defined(INSTALL_WRKMEM)
131 #define USE_TEST_VERSION
132 #endif
133
134 /*#define USE_TEST_VERSION */
135
136 /*#define SKIP_THREADS */
137
138
139 #ifdef USE_PTHREADS
140 pthread_mutex_t globalOutputMutex = PTHREAD_MUTEX_INITIALIZER;
141 #endif
142
143
144 /*//////////////////////////////////////////////////////////////////////// */
145 /*///////////////// typedef section //////////////////////////////////// */
146 /*//////////////////////////////////////////////////////////////////////// */
147
148 #if 0
149 struct DistributionSpecContr_{
150 integer noOfContr;
151 real coeffList[MAX_NO_OF_CONTR_GAUSSIANS];
152 real exponentList[MAX_NO_OF_CONTR_GAUSSIANS];
153 real centerCoords[3]; /* x0, y0, z0 */
154 integer monomialInts[3]; /* nx, ny, nz */
155 };
156 typedef struct DistributionSpecContr_ DistributionSpecContr;
157 #endif
158
159 typedef struct
160 {
161 integer noOfShells;
162 ShellSpecStruct* shellList;
163 integer nbast;
164 const real* dmat;
165 BasisFuncStruct* basisFuncList;
166 integer noOfDistributions;
167 DistributionSpecStruct* distrList;
168 } DensitySpecStruct;
169
170
171
172 struct BoxStruct_{
173 real min[3]; /* xmin, ymin, zmin */
174 real max[3]; /* xmax, ymax, zmax */
175 };
176 typedef struct BoxStruct_ BoxStruct;
177
178 struct rhoTreeNode_{
179 BoxStruct box;
180 struct rhoTreeNode_* child1; /* NULL for leaf node */
181 struct rhoTreeNode_* child2; /* NULL for leaf node */
182 integer distrIndex; /* -1 for non-leaf node */
183 };
184 typedef struct rhoTreeNode_ rhoTreeNode;
185
186 typedef struct
187 {
188 DensitySpecStruct density;
189 integer noOfNonzeroDistributions;
190 integer* nonZeroDistrIndexList;
191 integer noOfNonzeroShells;
192 integer* nonZeroShellsIndexList;
193 real maxerrorPerBox;
194 } compute_grid_for_box_params_struct;
195
196 typedef struct
197 {
198 DensitySpecStruct* density;
199 rhoTreeNode* rhoTreeRootNode;
200 rhoTreeNode* rhoTreeRootNodeShells;
201 real maxerror;
202 FILE* gridFile;
203 BoxStruct* startBox;
204 integer Nx;
205 integer Ny;
206 integer Nz;
207 #ifdef USE_PTHREADS
208 pthread_mutex_t* fileMutex;
209 pthread_mutex_t* jobMutex;
210 pthread_t thread;
211 #endif
212 integer* currJobNumber;
213 integer noOfPoints; /* OUTPUT */
214 integer noOfWrittenBatches; /* OUTPUT */
215 real integralResult; /* OUTPUT */
216 integer threadNo;
217 } compute_grid_thread_func_struct;
218
219
220 /*//////////////////////////////////////////////////////////////////////// */
221 /*///////////////// end of typedef section ///////////////////////////// */
222 /*//////////////////////////////////////////////////////////////////////// */
223
224
225 /* Solid harmonics based on the table 6.3 of Molecular
226 * Electronic-Structure Theory by Helgaker, Jørgensen and Olsen. */
227
228 #define solid_harmonic_s_0(x, y, z, x2, y2, z2, r2) 1
229
230 #define solid_harmonic_p_0(x, y, z, x2, y2, z2, r2) x
231 #define solid_harmonic_p_1(x, y, z, x2, y2, z2, r2) y
232 #define solid_harmonic_p_2(x, y, z, x2, y2, z2, r2) z
233
234 #define solid_harmonic_d_0(x, y, z, x2, y2, z2, r2) (x * y)
235 #define solid_harmonic_d_1(x, y, z, x2, y2, z2, r2) (y * z)
236 #define solid_harmonic_d_2(x, y, z, x2, y2, z2, r2) ((2 * z2 - x2 - y2) / (2 * sqrt(3)))
237 #define solid_harmonic_d_3(x, y, z, x2, y2, z2, r2) (x * z)
238 #define solid_harmonic_d_4(x, y, z, x2, y2, z2, r2) (0.5 * (x2 - y2))
239
240 #define solid_harmonic_f_0(x, y, z, x2, y2, z2, r2) ((0.5 * sqrt(2.5) * (3 * x2 - y2) * y) / sqrt(15))
241 #define solid_harmonic_f_1(x, y, z, x2, y2, z2, r2) (x * y * z)
242 #define solid_harmonic_f_2(x, y, z, x2, y2, z2, r2) (0.5 * sqrt(1.5) * (5 * z2 - r2) * y / sqrt(15))
243 #define solid_harmonic_f_3(x, y, z, x2, y2, z2, r2) (0.5 * (5 * z2 - 3 * r2) * z / sqrt(15))
244 #define solid_harmonic_f_4(x, y, z, x2, y2, z2, r2) (0.5 * sqrt(1.5) * (5 * z2 - r2) * x / sqrt(15))
245 #define solid_harmonic_f_5(x, y, z, x2, y2, z2, r2) (0.5 * (x2 - y2) * z)
246 #define solid_harmonic_f_6(x, y, z, x2, y2, z2, r2) (0.5 * sqrt(2.5) * (x2 - 3 * y2) * x / sqrt(15))
247
248
249
250 void
do_output_2(integer prio,const char * format,...)251 do_output_2(integer prio, const char* format, ...)
252 {
253 char s[888];
254 va_list a;
255
256 if(prio > global_outputLevel)
257 return;
258
259 #ifdef USE_PTHREADS
260 pthread_mutex_lock(&globalOutputMutex);
261 #endif
262
263 va_start(a, format);
264 vsnprintf(s, sizeof(s), format, a);
265 va_end(a);
266
267 #if defined USE_TEST_VERSION
268 printf("USE_TEST_VERSION defined\n");
269 printf("%s\n", s);
270 #else
271 fort_print(s);
272 #endif
273
274 #ifdef USE_PTHREADS
275 pthread_mutex_unlock(&globalOutputMutex);
276 #endif
277 }
278
279
280
281 static void
do_error_exit(const char * s)282 do_error_exit(const char* s)
283 {
284 char ss[888];
285 sprintf(ss, "error_exit: %s\n", s);
286 printf("%s\n",ss);
287 do_output_2(0, ss);
288 fprintf(stderr,"%s\n", ss);
289 exit(1);
290 }
291
292 void*
dal_malloc_safe_(size_t sz,const char * place,integer line)293 dal_malloc_safe_(size_t sz, const char *place, integer line)
294 {
295 void* res = malloc(sz);
296 if(!res) {
297 do_output_2(0, "error in dal_malloc_safe, '%s', sz = %i, line %i\n",
298 place, sz, line);
299 do_error_exit("error in dal_malloc_safe_");
300 }
301 return res;
302 }
303
304 void
dal_free(void * a)305 dal_free(void *a)
306 {
307 free(a);
308 }
309
310 #define dal_malloc_safe(sz) dal_malloc_safe_((sz),__FUNCTION__, __LINE__)
311
312
313
314
315
316 #if 0
317 void
318 dftgridparams_(char *line, integer len)
319 {
320 integer i = 0, tokenlen;
321 char *sp;
322 char* endPtr;
323 printf("dftgridparams_ len = %i\n", len);
324 printf("parsing '%s'\n", line);
325 endPtr = line + len;
326 for(i=0; i<len; i+=tokenlen) {
327 sp = strchr(line+i, ' ');
328 /*printf("parsing '%s'\n", line+i); */
329 if(sp)
330 tokenlen = sp-(line+i)+1;
331 else tokenlen=100000;
332 /*printf("tokenlen = %i\n", tokenlen); */
333 if(strncmp(line+i, "box=", 4) ==0) {
334 global_boxdist = atof(line+i+4);
335 printf("New box distance %f\n", global_boxdist);
336 continue;
337 }
338 if(strncmp(line+i, "maxerror=", 9) ==0) {
339 global_maxerror = atof(line+i+9);
340 printf("New maxerror %g\n", global_maxerror);
341 continue;
342 }
343 if(strncmp(line+i, "nthreads=", 9) ==0) {
344 global_nThreads = atoi(line+i+9);
345 printf("New global_nThreads %i\n", global_nThreads);
346 continue;
347 }
348 if(strncmp(line+i, "dotest=", 7) ==0) {
349 global_doTestIntegration = atoi(line+i+7);
350 printf("New global_doTestIntegration %i\n",
351 global_doTestIntegration);
352 continue;
353 }
354 if(strncmp(line+i, "usehicu=", 8) ==0) {
355 global_useHiCu = atoi(line+i+8);
356 printf("New global_useHiCu %i\n",
357 global_useHiCu);
358 continue;
359 }
360 if(strncmp(line+i, "nfreeze=", 8) ==0) {
361 global_nFreeze = atoi(line+i+8);
362 printf("New global_nFreeze %i\n",
363 global_nFreeze);
364 continue;
365 }
366 if(strncmp(line+i, "output=", 7) ==0) {
367 global_outputLevel = atoi(line+i+7);
368 printf("New global_outputLevel %i\n", global_outputLevel);
369 continue;
370 }
371 } /* END FOR i */
372 }
373 #endif
374
375
make_float_string(char * s,real x)376 static void make_float_string(char* s, real x)
377 {
378 real temp;
379 integer power;
380 power = 0;
381 temp = x;
382 while(fabs(temp) > 9.999)
383 {
384 temp /= 10;
385 power++;
386 }
387 while(fabs(temp) < 0.999)
388 {
389 temp *= 10;
390 power--;
391 }
392 sprintf(s, "%.1fe%i", (double)temp, power);
393 }
394
395
396
397
398 static integer
parseParam(char * s)399 parseParam(char* s)
400 {
401 /* use #define instead of more modern static const integer to please old
402 * compilers. */
403 #define MAX_BYTES 222
404 char* p = s;
405 char* endPtr = s + strlen(s);
406 char paramName[MAX_BYTES];
407 char paramValueString[MAX_BYTES];
408 /* look for = */
409 char* q = p;
410
411 while((*q != '=') && (*q != 0))
412 q++;
413 if(*q != '=')
414 {
415 do_output_2(0, "error parsing string '%s': '=' not found", s);
416 return -1;
417 }
418 /* now q points to '=' */
419 memcpy(paramName, p, q-p);
420 paramName[q-p] = 0;
421 /*printf("paramName = '%s'\n", paramName); */
422 memcpy(paramValueString, q+1, endPtr-q-1);
423 paramValueString[endPtr-q-1] = 0;
424 /*printf("paramValueString = '%s'\n", paramValueString); */
425 if(strlen(paramName) == 0)
426 {
427 do_output_2(0, "error parsing string '%s': nothing found before '='", s);
428 return -1;
429 }
430 if(strlen(paramValueString) == 0)
431 {
432 do_output_2(0, "error parsing string '%s': nothing found after '='", s);
433 return -1;
434 }
435
436 if(strcmp(paramName, "box") == 0)
437 {
438 real newBoxDist = atof(paramValueString);
439 if(newBoxDist <= 0)
440 {
441 do_output_2(0, "error: grid param box = %f", newBoxDist);
442 return -1;
443 }
444 global_boxdist = newBoxDist;
445 do_output_2(2, "grid parameter box = %f", global_boxdist);
446 return 0;
447 }
448 if(strcmp(paramName, "maxerror") == 0)
449 {
450 real new_maxerror;
451 char ss[MAX_BYTES];
452 new_maxerror = atof(paramValueString);
453 if(new_maxerror <= 0)
454 {
455 do_output_2(0, "error: grid param maxerror = %f", new_maxerror);
456 return -1;
457 }
458 global_maxerror = new_maxerror;
459 make_float_string(ss, global_maxerror);
460 do_output_2(2, "grid parameter maxerror = %s", ss);
461 return 0;
462 }
463 if(strcmp(paramName, "output") == 0)
464 {
465 global_outputLevel = atoi(paramValueString);
466 do_output_2(2, "grid parameter output = %i", global_outputLevel);
467 return 0;
468 }
469 if(strcmp(paramName, "nfreeze") == 0)
470 {
471 integer new_nFreeze = atoi(paramValueString);
472 if(new_nFreeze <= 0)
473 {
474 do_output_2(0, "error: grid param nfreeze = %f", new_nFreeze);
475 return -1;
476 }
477 global_nFreeze = new_nFreeze;
478 do_output_2(2, "grid parameter nfreeze = %i", global_nFreeze);
479 return 0;
480 }
481 if(strcmp(paramName, "nthreads") == 0)
482 {
483 integer new_nThreads = atoi(paramValueString);
484 if(new_nThreads <= 0)
485 {
486 do_output_2(0, "error: grid param nthreads = %f", new_nThreads);
487 return -1;
488 }
489 global_nThreads = new_nThreads;
490 do_output_2(2, "grid parameter nthreads = %i", global_nThreads);
491 return 0;
492 }
493 if(strcmp(paramName, "dotest") == 0)
494 {
495 global_doTestIntegration = atoi(paramValueString);
496 do_output_2(2, "grid parameter dotest = %i\n",
497 global_doTestIntegration);
498 return 0;
499 }
500 do_output_2(0, "error in grid input: unknown parameter '%s'", paramName);
501 return -1;
502 }
503
504 /* dftcartesianinput: called from Fortran code to allow setting different
505 * parameters of the cubature code. */
506 void
dftcartesianinput_(const char * line,integer line_len)507 dftcartesianinput_(const char *line, integer line_len)
508 {
509 #define MAX_BYTES 222
510 integer inperr;
511 integer* inperrPtr = &inperr;
512 char *endPtr, *p;
513 char line2[MAX_BYTES];
514
515 do_output_2(1, "dftcartesianinput, line_len = %i", line_len);
516 if(line_len < 0)
517 return;
518 memset(line2, 0, MAX_BYTES);
519 memcpy(line2, line, line_len);
520 line2[line_len] = 0;
521 /*printf("dftgridparams_ len = %i\n", line_len); */
522 /*printf("line after cutting at line_len:\n'%s'\n", line2); */
523 endPtr = line2 + line_len;
524 p = line2;
525 while(p < endPtr)
526 {
527 char paramBuf[MAX_BYTES];
528 char* q;
529 /* skip spaces */
530 while(*p == ' ')
531 p++;
532 if(*p == 0)
533 break;
534 /* now we are at the beginning of some string */
535 q = p;
536 while((*q != ' ') && (*q != 0))
537 q++;
538 memcpy(paramBuf, p, q-p);
539 paramBuf[q-p] = 0;
540 if(parseParam(paramBuf) != 0)
541 *inperrPtr++;
542 p = q;
543 }
544 }
545
546
547 static void
print_box(BoxStruct * box,integer prio)548 print_box(BoxStruct* box, integer prio)
549 {
550 integer i;
551 do_output_2(prio, "print_box:");
552 for(i = 0; i < NO_OF_DIMENSIONS; i++)
553 {
554 do_output_2(prio, "min = %.11f max = %.11f",
555 (double)box->min[i], (double)box->max[i]);
556 } /* END FOR i */
557 }
558
559
560 static integer
get_distribution_box(BoxStruct * box,DistributionSpecStruct * distr,real targetRhoError)561 get_distribution_box(BoxStruct* box,
562 DistributionSpecStruct* distr,
563 real targetRhoError)
564 {
565 real targetError, r1, extent, arg;
566 integer i;
567 targetError = targetRhoError;
568 arg = distr->coeff / targetError;
569 if(arg < 0) arg *= -1;
570 if(arg < 1e-30)
571 {
572 do_output_2(0, "error in get_distribution_box: (arg == 0)");
573 return -1;
574 }
575 r1 = log(arg);
576 if(r1 < 0) r1 *= -1;
577 extent = sqrt(r1 / distr->exponent);
578 for(i = 0; i < NO_OF_DIMENSIONS; i++)
579 {
580 box->min[i] = distr->centerCoords[i] - extent;
581 box->max[i] = distr->centerCoords[i] + extent;
582 } /* END FOR i */
583 return 0;
584 } /* END get_distribution_box */
585
586 static integer
get_shell_box(BoxStruct * box,ShellSpecStruct * shell)587 get_shell_box(BoxStruct* box, ShellSpecStruct* shell)
588 {
589 integer i;
590 for(i = 0; i < NO_OF_DIMENSIONS; i++)
591 {
592 box->min[i] = shell->centerCoords[i] - shell->extent;
593 box->max[i] = shell->centerCoords[i] + shell->extent;
594 } /* END FOR i */
595 return 0;
596 } /* END get_shell_box */
597
598
599
600
601 static real
compute_value_at_point(DensitySpecStruct * density,integer noOfNonzeroShells,integer * nonZeroShellsIndexList,integer noOfNonzeroBasFuncs,integer * nonZeroBasFuncsIndexList,real (* coor)[3],real * workList)602 compute_value_at_point(
603 DensitySpecStruct* density,
604 integer noOfNonzeroShells,
605 integer* nonZeroShellsIndexList,
606 integer noOfNonzeroBasFuncs,
607 integer* nonZeroBasFuncsIndexList,
608 real (*coor)[3],
609 real* workList)
610 {
611 ShellSpecStruct* currShell;
612 integer i, j, iIndex, jIndex, symmetryFactor, count;
613 real expFactor, result, currivalue;
614 real xdiff, ydiff, zdiff;
615 real x0, y0, z0;
616 real x2, y2, z2, r2;
617 integer nbast;
618 const real* dmat;
619
620 nbast = density->nbast;
621 dmat = density->dmat;
622
623 if(noOfNonzeroBasFuncs > nbast)
624 {
625 do_error_exit("error in compute_integral_from_points: "
626 "(noOfNonzeroBasFuncs > nbast)\n");
627 return 0;
628 }
629
630 /* compute values of contracted distributions at given point */
631 count = 0;
632 for(i = 0; i < noOfNonzeroShells; i++)
633 {
634 currShell = &density->shellList[nonZeroShellsIndexList[i]];
635 x0 = currShell->centerCoords[0];
636 y0 = currShell->centerCoords[1];
637 z0 = currShell->centerCoords[2];
638
639 xdiff = coor[0][0] - x0;
640 ydiff = coor[0][1] - y0;
641 zdiff = coor[0][2] - z0;
642 x2 = xdiff * xdiff;
643 y2 = ydiff * ydiff;
644 z2 = zdiff * zdiff;
645 r2 = x2 + y2 + z2;
646
647 /* compute expFactor (this is the same procedure for all shell types) */
648 expFactor = 0;
649 for(j = 0; j < currShell->noOfContr; j++)
650 expFactor += currShell->coeffList[j] *
651 exp(-currShell->exponentList[j] * r2);
652 /* OK, expFactor computed */
653
654 /* now there will be a different number of entries */
655 /* depending on shell type */
656 switch(currShell->shellType)
657 {
658 case 0:
659 /* 's' type shell, 1 function */
660 workList[count] = expFactor *
661 solid_harmonic_s_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
662 break;
663 case 1:
664 /* 'p' type shell, 3 functions */
665 workList[count] = expFactor *
666 solid_harmonic_p_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
667 workList[count] = expFactor *
668 solid_harmonic_p_1(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
669 workList[count] = expFactor *
670 solid_harmonic_p_2(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
671 break;
672 case 2:
673 /* 'd' type shell, 5 functions */
674 workList[count] = expFactor *
675 solid_harmonic_d_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
676 workList[count] = expFactor *
677 solid_harmonic_d_1(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
678 workList[count] = expFactor *
679 solid_harmonic_d_2(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
680 workList[count] = expFactor *
681 solid_harmonic_d_3(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
682 workList[count] = expFactor *
683 solid_harmonic_d_4(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
684 break;
685 case 3:
686 /* 'f' type shell, 7 functions */
687 workList[count] = expFactor *
688 solid_harmonic_f_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
689 workList[count] = expFactor *
690 solid_harmonic_f_1(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
691 workList[count] = expFactor *
692 solid_harmonic_f_2(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
693 workList[count] = expFactor *
694 solid_harmonic_f_3(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
695 workList[count] = expFactor *
696 solid_harmonic_f_4(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
697 workList[count] = expFactor *
698 solid_harmonic_f_5(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
699 workList[count] = expFactor *
700 solid_harmonic_f_6(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
701 break;
702 default:
703 do_output_2(0, "error in compute_value_at_point_2: "
704 "only spdf type shells implemented");
705 do_output_2(0, "currShell->shellType = %i\n", currShell->shellType);
706 do_error_exit("error in compute_value_at_point_2");
707 return -1;
708 } /* END SWITCH shellType */
709 } /* END FOR i (for each shell) */
710
711 if(count > nbast)
712 {
713 do_error_exit("error in compute_value_at_point: (count > nbast)");
714 return -1;
715 }
716
717 /* now use density matrix to obtain final result */
718 result = 0;
719 for(i = 0; i < noOfNonzeroBasFuncs; i++)
720 {
721 currivalue = workList[i];
722 iIndex = nonZeroBasFuncsIndexList[i];
723 for(j = i; j < noOfNonzeroBasFuncs; j++)
724 {
725 if(j == i)
726 symmetryFactor = 1;
727 else
728 symmetryFactor = 2;
729 jIndex = nonZeroBasFuncsIndexList[j];
730 result += symmetryFactor * dmat[iIndex*nbast+jIndex] *
731 currivalue * workList[j];
732 } /* END FOR j */
733 } /* END FOR i */
734
735 return result;
736 } /* END compute_value_at_point */
737
738
739
740 static real
compute_integral_from_points(DensitySpecStruct * density,integer noOfNonzeroShells,integer * nonZeroShellsIndexList,integer noOfNonzeroBasFuncs,integer * nonZeroBasFuncsIndexList,integer nPoints,real (* coor)[3],real * weight,real * workList)741 compute_integral_from_points(
742 DensitySpecStruct* density,
743 integer noOfNonzeroShells,
744 integer* nonZeroShellsIndexList,
745 integer noOfNonzeroBasFuncs,
746 integer* nonZeroBasFuncsIndexList,
747 integer nPoints,
748 real (*coor)[3],
749 real* weight,
750 real* workList)
751 {
752 #if 1
753 integer i;
754 real sum;
755 sum = 0;
756 for(i = 0; i < nPoints; i++)
757 {
758 sum += compute_value_at_point(density,
759 noOfNonzeroShells,
760 nonZeroShellsIndexList,
761 noOfNonzeroBasFuncs,
762 nonZeroBasFuncsIndexList,
763 &coor[i],
764 workList) * weight[i];
765 } /* END FOR i */
766 return sum;
767 #else
768 ShellSpecStruct* currShell;
769 integer i, j, iIndex, jIndex, symmetryFactor, count, pointNo;
770 real expFactor, result;
771 real xdiff, ydiff, zdiff;
772 real x2, y2, z2, r2;
773 real x0, y0, z0;
774 real sum;
775 integer savedCount;
776 integer nbast;
777 const real* dmat;
778
779 nbast = density->nbast;
780 dmat = density->dmat;
781
782 if(noOfNonzeroBasFuncs > nbast)
783 {
784 do_error_exit("error in compute_integral_from_points: "
785 "(noOfNonzeroBasFuncs > nbast)\n");
786 return 0;
787 }
788
789 if(nPoints > MAX_NO_OF_POINTS_PER_BATCH)
790 {
791 do_error_exit("error in compute_integral_from_points: "
792 "(nPoints > MAX_NO_OF_POINTS_PER_BATCH)\n");
793 return 0;
794 }
795
796 memset(workList, 0x00 ,noOfNonzeroBasFuncs * sizeof(real));
797
798 /* compute values of contracted distributions at given point */
799 count = 0;
800
801 for(i = 0; i < noOfNonzeroShells; i++)
802 {
803 currShell = &density->shellList[nonZeroShellsIndexList[i]];
804 x0 = currShell->centerCoords[0];
805 y0 = currShell->centerCoords[1];
806 z0 = currShell->centerCoords[2];
807
808 savedCount = count;;
809 for(pointNo = 0; pointNo < nPoints; pointNo++)
810 {
811 count = savedCount;
812
813 xdiff = coor[pointNo][0] - x0;
814 ydiff = coor[pointNo][1] - y0;
815 zdiff = coor[pointNo][2] - z0;
816 x2 = xdiff * xdiff;
817 y2 = ydiff * ydiff;
818 z2 = zdiff * zdiff;
819 r2 = x2 + y2 + z2;
820
821 /* compute expFactor (this is the same procedure for all
822 * shell types) */
823 expFactor = 0;
824 for(j = 0; j < currShell->noOfContr; j++)
825 expFactor += currShell->coeffList[j] *
826 exp(-currShell->exponentList[j] * r2);
827 /* OK, expFactor computed */
828 /*printf("expFactor = %.22f\n", expFactor); */
829
830 /* now there will be a different number of entries */
831 /* depending on shell type */
832 switch(currShell->shellType)
833 {
834 case 0:
835 /* 's' type shell, 1 function */
836 workList[count*nPoints+pointNo] = expFactor *
837 solid_harmonic_s_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
838 count++;
839 break;
840 case 1:
841 /* 'p' type shell, 3 functions */
842 workList[count*nPoints+pointNo] = expFactor *
843 solid_harmonic_p_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
844 count++;
845 workList[count*nPoints+pointNo] = expFactor *
846 solid_harmonic_p_1(xdiff, ydiff, zdiff, x2, y2, z2, r2);
847 count++;
848 workList[count*nPoints+pointNo] = expFactor *
849 solid_harmonic_p_2(xdiff, ydiff, zdiff, x2, y2, z2, r2);
850 count++;
851 break;
852 case 2:
853 /* 'd' type shell, 5 functions */
854 workList[count*nPoints+pointNo] = expFactor *
855 solid_harmonic_d_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
856 count++;
857 workList[count*nPoints+pointNo] = expFactor *
858 solid_harmonic_d_1(xdiff, ydiff, zdiff, x2, y2, z2, r2);
859 count++;
860 workList[count*nPoints+pointNo] = expFactor *
861 solid_harmonic_d_2(xdiff, ydiff, zdiff, x2, y2, z2, r2);
862 count++;
863 workList[count*nPoints+pointNo] = expFactor *
864 solid_harmonic_d_3(xdiff, ydiff, zdiff, x2, y2, z2, r2);
865 count++;
866 workList[count*nPoints+pointNo] = expFactor *
867 solid_harmonic_d_4(xdiff, ydiff, zdiff, x2, y2, z2, r2);
868 count++;
869 break;
870 case 3:
871 /* 'f' type shell, 7 functions */
872 workList[count*nPoints+pointNo] = expFactor *
873 solid_harmonic_f_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
874 count++;
875 workList[count*nPoints+pointNo] = expFactor *
876 solid_harmonic_f_1(xdiff, ydiff, zdiff, x2, y2, z2, r2);
877 count++;
878 workList[count*nPoints+pointNo] = expFactor *
879 solid_harmonic_f_2(xdiff, ydiff, zdiff, x2, y2, z2, r2);
880 count++;
881 workList[count*nPoints+pointNo] = expFactor *
882 solid_harmonic_f_3(xdiff, ydiff, zdiff, x2, y2, z2, r2);
883 count++;
884 workList[count*nPoints+pointNo] = expFactor *
885 solid_harmonic_f_4(xdiff, ydiff, zdiff, x2, y2, z2, r2);
886 count++;
887 workList[count*nPoints+pointNo] = expFactor *
888 solid_harmonic_f_5(xdiff, ydiff, zdiff, x2, y2, z2, r2);
889 count++;
890 workList[count*nPoints+pointNo] = expFactor *
891 solid_harmonic_f_6(xdiff, ydiff, zdiff, x2, y2, z2, r2);
892 count++;
893 break;
894 default:
895 do_output_2(0, "error in compute_integral_from_points: "
896 "only spdf type shells implemented");
897 do_output_2(0, "currShell->shellType = %i\n",
898 currShell->shellType);
899 do_error_exit("error in compute_integral_from_points");
900 return -1;
901 } /* END SWITCH shellType */
902
903 } /* END FOR pointNo */
904
905 } /* END FOR i (for each shell) */
906
907 if(count != noOfNonzeroBasFuncs)
908 {
909 do_error_exit("error in compute_integral_from_points: "
910 "(count != noOfNonzeroBasFuncs)");
911 return -1;
912 }
913
914 /* now use density matrix to obtain final result */
915 result = 0;
916 for(i = 0; i < noOfNonzeroBasFuncs; i++)
917 {
918 iIndex = nonZeroBasFuncsIndexList[i];
919 for(j = i; j < noOfNonzeroBasFuncs; j++)
920 {
921 if(j == i)
922 symmetryFactor = 1;
923 else
924 symmetryFactor = 2;
925 jIndex = nonZeroBasFuncsIndexList[j];
926 /*real *restrict ci = &workList[i*nPoints]; */
927 /*real *restrict cj = &workList[j*nPoints]; */
928 /*real *restrict wt = weight; */
929 sum = 0;
930 for(pointNo = 0; pointNo < nPoints; pointNo++)
931 {
932 sum += workList[i*nPoints+pointNo] *
933 workList[j*nPoints+pointNo] * weight[pointNo];
934 /*sum += ci[pointNo] * cj[pointNo] * wt[pointNo]; */
935 } /* END FOR pointNo */
936
937 result += symmetryFactor * dmat[iIndex*nbast+jIndex] * sum;
938 } /* END FOR j */
939 } /* END FOR i */
940
941 return result;
942 #endif
943 } /* END compute_integral_from_points */
944
945
946
947 static real
to_power(real x,integer n)948 to_power(real x, integer n)
949 {
950 real result;
951 integer i;
952 result = 1;
953 for(i = 0; i < n; i++)
954 result *= x;
955 return result;
956 }
957
958
959 static real
compute_1d_gaussian_integral_recursive(real a,real b,integer n,real alpha)960 compute_1d_gaussian_integral_recursive(real a, real b, integer n, real alpha)
961 {
962 real result, sqrtalpha, term1, term2;
963 real aToPowerNminus1, bToPowerNminus1;
964 if(n == 0)
965 {
966 sqrtalpha = sqrt(alpha);
967 result = sqrt(CONSTPI/(4*alpha)) * (erf(sqrtalpha*b) - erf(sqrtalpha*a));
968 return result;
969 }
970 if(n == 1)
971 {
972 result = -(1 / (2*alpha)) * (exp(-alpha*b*b) - exp(-alpha*a*a));
973 return result;
974 }
975 if(n < 0)
976 {
977 do_output_2(0, "error in 1dintegral: n < 0");
978 return 0;
979 }
980 /* now we know that n >= 2 */
981 term1 = (n - 1) * compute_1d_gaussian_integral_recursive(a, b, n-2, alpha);
982 aToPowerNminus1 = to_power(a, n-1);
983 bToPowerNminus1 = to_power(b, n-1);
984 term2 =
985 bToPowerNminus1 * exp(-alpha*b*b) -
986 aToPowerNminus1 * exp(-alpha*a*a);
987 result = (term1 - term2) / (2 * alpha);
988 /* return 0; */
989 return result;
990 } /* END compute_1d_gaussian_integral_recursive */
991
992
993
994 static real
compute_1d_gaussian_integral(real a,real b,integer n,real alpha)995 compute_1d_gaussian_integral(real a, real b, integer n, real alpha)
996 {
997 real result, sqrtalpha, term1, term2;
998 return compute_1d_gaussian_integral_recursive(a, b, n, alpha);
999 result = 0;
1000 switch(n)
1001 {
1002 case 0:
1003 sqrtalpha = sqrt(alpha);
1004 result = sqrt(CONSTPI/(4*alpha)) * (erf(sqrtalpha*b) - erf(sqrtalpha*a));
1005 break;
1006 case 1:
1007 result = -(1 / (2*alpha)) * (exp(-alpha*b*b) - exp(-alpha*a*a));
1008 break;
1009 case 2:
1010 sqrtalpha = sqrt(alpha);
1011 term1 =
1012 sqrt(CONSTPI/(16*alpha*alpha*alpha)) *
1013 (erf(sqrtalpha*b) - erf(sqrtalpha*a));
1014 term2 = -(1 / (2 * alpha)) * (b*exp(-alpha*b*b) - a*exp(-alpha*a*a));
1015 result = term1 + term2;
1016 break;
1017 case 3:
1018 result = -(1 / (2*alpha*alpha)) * ((1+alpha*b*b)*exp(-alpha*b*b) -
1019 (1+alpha*a*a)*exp(-alpha*a*a));
1020 break;
1021 default:
1022 compute_1d_gaussian_integral_recursive(a, b, n, alpha);
1023 break;
1024 } /* END SWITCH n */
1025 return result;
1026 } /* END compute_1d_gaussian_integral */
1027
1028
1029 static real
compute_integral_over_box(DistributionSpecStruct * distr,BoxStruct * box)1030 compute_integral_over_box(DistributionSpecStruct* distr, BoxStruct* box)
1031 {
1032 real result, a, b, alpha;
1033 integer i, n;
1034 result = distr->coeff;
1035 alpha = distr->exponent;
1036 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1037 {
1038 n = distr->monomialInts[i];
1039 a = box->min[i] - distr->centerCoords[i];
1040 b = box->max[i] - distr->centerCoords[i];
1041 result *= compute_1d_gaussian_integral(a, b, n, alpha);
1042 } /* END FOR i */
1043 return result;
1044 } /* END compute_integral_over_box */
1045
1046
1047 static integer
get_distrs_for_box(integer * resultList,rhoTreeNode * node,BoxStruct * inputBoxPtr)1048 get_distrs_for_box(integer* resultList, rhoTreeNode* node, BoxStruct* inputBoxPtr)
1049 {
1050 #define MAX_DEPTH 888
1051 integer n, i, overlap, currDepth;
1052 rhoTreeNode* nodeList[MAX_DEPTH];
1053 integer statusList[MAX_DEPTH];
1054 rhoTreeNode* currNode;
1055 BoxStruct box;
1056 BoxStruct* currBox;
1057
1058 memcpy(&box, inputBoxPtr, sizeof(BoxStruct));
1059
1060 n = 0;
1061 currDepth = 0;
1062 nodeList[0] = node;
1063 statusList[0] = 0;
1064 while(currDepth >= 0)
1065 {
1066 if(statusList[currDepth] == 2)
1067 currDepth--;
1068 else
1069 {
1070
1071 currNode = nodeList[currDepth];
1072 currBox = &currNode->box;
1073
1074 /* check for box overlap */
1075 overlap = 1;
1076 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1077 {
1078 if(currBox->min[i] > box.max[i])
1079 overlap = 0;
1080 if(currBox->max[i] < box.min[i])
1081 overlap = 0;
1082 } /* END FOR i */
1083 if(overlap == 0)
1084 currDepth--;
1085 else
1086 {
1087
1088 if(statusList[currDepth] == 0)
1089 {
1090 if(currNode->distrIndex >= 0)
1091 {
1092 resultList[n] = currNode->distrIndex;
1093 n++;
1094 currDepth--;
1095 }
1096 else
1097 {
1098 statusList[currDepth] = 1;
1099 currDepth++;
1100 statusList[currDepth] = 0;
1101 nodeList[currDepth] = currNode->child1;
1102 }
1103 } /* END IF status 0 */
1104 else
1105 {
1106 /* status is 1 */
1107 statusList[currDepth] = 2;
1108 currDepth++;
1109 statusList[currDepth] = 0;
1110 nodeList[currDepth] = currNode->child2;
1111 } /* END ELSE status 1 */
1112 }
1113 }
1114 } /* END WHILE (currDepth >= 0) */
1115
1116 return n;
1117 } /* END get_distrs_for_box */
1118
1119 static integer
use_cubature_rule(integer maxlen,real (* coor)[3],real * weight,BoxStruct * box,integer ruleNumber)1120 use_cubature_rule(integer maxlen,
1121 real (*coor)[3],
1122 real *weight,
1123 BoxStruct* box,
1124 integer ruleNumber)
1125 {
1126 real volume, diff0, diff1, diff2;
1127 real c0, c1, c2, a, b;
1128 real currCoords[3];
1129 integer Ngrid, currIndex;
1130 integer i, j, k, ii;
1131 real a0, a1, a2;
1132
1133 volume = 1;
1134 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1135 volume *= (box->max[i] - box->min[i]);
1136
1137 switch(ruleNumber)
1138 {
1139 case 1: /* single point in center of box */
1140 Ngrid = 1;
1141 if(Ngrid >= maxlen)
1142 {
1143 do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1144 return -1;
1145 }
1146 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1147 coor[0][i] = (box->max[i] + box->min[i]) / 2;
1148 weight[0] = volume;
1149 break;
1150 case 2: /* eight points towards corners of box */
1151 Ngrid = 8;
1152 if(Ngrid >= maxlen)
1153 {
1154 do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1155 return -1;
1156 }
1157 for(i = 0; i < Ngrid; i++)
1158 weight[i] = volume / 8;
1159 diff0 = box->max[0] - box->min[0];
1160 diff1 = box->max[1] - box->min[1];
1161 diff2 = box->max[2] - box->min[2];
1162 currIndex = 0;
1163 for(i = 0; i < 2; i++)
1164 {
1165 currCoords[0] = box->min[0] + 0.25*diff0 + 0.5*diff0*i;
1166 for(j = 0; j < 2; j++)
1167 {
1168 currCoords[1] = box->min[1] + 0.25*diff1 + 0.5*diff1*i;
1169 for(k = 0; k < 2; k++)
1170 {
1171 currCoords[2] = box->min[2] + 0.25*diff2 + 0.5*diff2*i;
1172 for(ii = 0; ii < 3; ii++)
1173 {
1174 coor[currIndex][ii] = currCoords[ii];
1175 } /* END FOR ii */
1176 currIndex++;
1177 } /* END FOR k */
1178 } /* END FOR j */
1179 } /* END FOR i */
1180 break;
1181 case 3: /* 14 point, degree 5 rule (Stroud 1971) */
1182 Ngrid = 14;
1183 if(Ngrid >= maxlen)
1184 {
1185 do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1186 return -1;
1187 }
1188 for(i = 0; i < 6; i++)
1189 weight[i] = volume * 0.88642659279778393 / 8;
1190 for(i = 6; i < 14; i++)
1191 weight[i] = volume * 0.33518005540166204 / 8;
1192 diff0 = box->max[0] - box->min[0];
1193 diff1 = box->max[1] - box->min[1];
1194 diff2 = box->max[2] - box->min[2];
1195 c0 = (box->max[0] + box->min[0]) / 2;
1196 c1 = (box->max[1] + box->min[1]) / 2;
1197 c2 = (box->max[2] + box->min[2]) / 2;
1198 a = 0.79582242575422146 * 0.5;
1199 b = 0.75878691063932814 * 0.5;
1200
1201 #define MACRO_3VECT(v,x,y,z) v[0]=x; v[1]=y; v[2]=z;
1202
1203 MACRO_3VECT(coor[0], c0-a*diff0, c1, c2 );
1204 MACRO_3VECT(coor[1], c0+a*diff0, c1, c2 );
1205 MACRO_3VECT(coor[2], c0 , c1-a*diff1, c2 );
1206 MACRO_3VECT(coor[3], c0 , c1+a*diff1, c2 );
1207 MACRO_3VECT(coor[4], c0 , c1 , c2-a*diff2);
1208 MACRO_3VECT(coor[5], c0 , c1 , c2+a*diff2);
1209
1210 MACRO_3VECT(coor[ 6], c0-b*diff0, c1-b*diff1, c2-b*diff2);
1211 MACRO_3VECT(coor[ 7], c0-b*diff0, c1-b*diff1, c2+b*diff2);
1212 MACRO_3VECT(coor[ 8], c0-b*diff0, c1+b*diff1, c2-b*diff2);
1213 MACRO_3VECT(coor[ 9], c0-b*diff0, c1+b*diff1, c2+b*diff2);
1214 MACRO_3VECT(coor[10], c0+b*diff0, c1-b*diff1, c2-b*diff2);
1215 MACRO_3VECT(coor[11], c0+b*diff0, c1-b*diff1, c2+b*diff2);
1216 MACRO_3VECT(coor[12], c0+b*diff0, c1+b*diff1, c2-b*diff2);
1217 MACRO_3VECT(coor[13], c0+b*diff0, c1+b*diff1, c2+b*diff2);
1218
1219 break;
1220
1221 case 4: /* 25 point, degree 5 rule (Stroud 1971) */
1222 Ngrid = 25;
1223 if(Ngrid >= maxlen)
1224 {
1225 do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1226 return -1;
1227 }
1228 weight[0] = volume * 1.6842105263157894 / 8;
1229 for(i = 1; i < 25; i++)
1230 weight[i] = volume * 0.26315789473684210 / 8;
1231
1232 diff0 = box->max[0] - box->min[0];
1233 diff1 = box->max[1] - box->min[1];
1234 diff2 = box->max[2] - box->min[2];
1235 c0 = (box->max[0] + box->min[0]) / 2;
1236 c1 = (box->max[1] + box->min[1]) / 2;
1237 c2 = (box->max[2] + box->min[2]) / 2;
1238 a = 0.47800981191507060 * 0.5;
1239 b = 0.89982215247931316 * 0.5;
1240
1241 MACRO_3VECT(coor[0], c0, c1, c2);
1242
1243 ii = 1;
1244 a0 = a;
1245 a1 = a;
1246 a2 = b;
1247 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1248 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1249 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1250 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1251 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1252 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1253 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1254 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1255 a0 = a;
1256 a1 = b;
1257 a2 = a;
1258 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1259 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1260 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1261 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1262 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1263 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1264 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1265 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1266 a0 = b;
1267 a1 = a;
1268 a2 = a;
1269 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1270 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1271 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1272 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1273 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1274 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1275 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1276 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1277
1278 break;
1279
1280 case 5: /* 27 point, degree 7 rule (Stroud 1971) */
1281 Ngrid = 27;
1282 if(Ngrid >= maxlen)
1283 {
1284 do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1285 return -1;
1286 }
1287 weight[0] = volume * 0.78807348274421057 / 8;
1288 for(i = 1; i < 7; i++)
1289 weight[i] = volume * 0.49936900230772032 / 8;
1290 for(i = 7; i < 19; i++)
1291 weight[i] = volume * 0.032303742334037395 / 8;
1292 for(i = 19; i < 27; i++)
1293 weight[i] = volume * 0.47850844942512734 / 8;
1294
1295 diff0 = box->max[0] - box->min[0];
1296 diff1 = box->max[1] - box->min[1];
1297 diff2 = box->max[2] - box->min[2];
1298 c0 = (box->max[0] + box->min[0]) / 2;
1299 c1 = (box->max[1] + box->min[1]) / 2;
1300 c2 = (box->max[2] + box->min[2]) / 2;
1301
1302 MACRO_3VECT(coor[0], c0, c1, c2);
1303 a = 0.84841801147225245 * 0.5;
1304 ii = 1;
1305 MACRO_3VECT(coor[ii], c0-a*diff0, c1 , c2 ); ii++;
1306 MACRO_3VECT(coor[ii], c0+a*diff0, c1 , c2 ); ii++;
1307 MACRO_3VECT(coor[ii], c0 , c1-a*diff1, c2 ); ii++;
1308 MACRO_3VECT(coor[ii], c0 , c1+a*diff1, c2 ); ii++;
1309 MACRO_3VECT(coor[ii], c0 , c1 , c2-a*diff2); ii++;
1310 MACRO_3VECT(coor[ii], c0 , c1 , c2+a*diff2); ii++;
1311 a = 1.1064128986267175 * 0.5;
1312 MACRO_3VECT(coor[ii], c0-a*diff0, c1-a*diff1, c2 ); ii++;
1313 MACRO_3VECT(coor[ii], c0-a*diff0, c1+a*diff1, c2 ); ii++;
1314 MACRO_3VECT(coor[ii], c0+a*diff0, c1-a*diff1, c2 ); ii++;
1315 MACRO_3VECT(coor[ii], c0+a*diff0, c1+a*diff1, c2 ); ii++;
1316 MACRO_3VECT(coor[ii], c0-a*diff0, c1 , c2-a*diff2); ii++;
1317 MACRO_3VECT(coor[ii], c0-a*diff0, c1 , c2+a*diff2); ii++;
1318 MACRO_3VECT(coor[ii], c0+a*diff0, c1 , c2-a*diff2); ii++;
1319 MACRO_3VECT(coor[ii], c0+a*diff0, c1 , c2+a*diff2); ii++;
1320 MACRO_3VECT(coor[ii], c0 , c1-a*diff1, c2-a*diff2); ii++;
1321 MACRO_3VECT(coor[ii], c0 , c1-a*diff1, c2+a*diff2); ii++;
1322 MACRO_3VECT(coor[ii], c0 , c1+a*diff1, c2-a*diff2); ii++;
1323 MACRO_3VECT(coor[ii], c0 , c1+a*diff1, c2+a*diff2); ii++;
1324 a0 = a1 = a2 = 0.65281647210169120 * 0.5;
1325 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1326 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1327 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1328 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1329 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1330 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1331 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1332 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1333
1334 break;
1335
1336 case 6: /* 32 point, degree 7 rule (Beckers 1992) */
1337 Ngrid = 32;
1338 if(Ngrid >= maxlen)
1339 {
1340 do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1341 return -1;
1342 }
1343 for(i = 0; i < 6; i++)
1344 weight[i] = volume * 0.14098806933910446 / 8;
1345 for(i = 6; i < 12; i++)
1346 weight[i] = volume * 0.53332245896607639 / 8;
1347 for(i = 12; i < 24; i++)
1348 weight[i] = volume * 0.049451452995044458 / 8;
1349 for(i = 24; i < 32; i++)
1350 weight[i] = volume * 0.42008992427854766 / 8;
1351
1352 diff0 = box->max[0] - box->min[0];
1353 diff1 = box->max[1] - box->min[1];
1354 diff2 = box->max[2] - box->min[2];
1355 c0 = (box->max[0] + box->min[0]) / 2;
1356 c1 = (box->max[1] + box->min[1]) / 2;
1357 c2 = (box->max[2] + box->min[2]) / 2;
1358
1359 ii = 0;
1360
1361 a = 1.0 * 0.5;
1362 MACRO_3VECT(coor[ii], c0-a*diff0, c1 , c2 ); ii++;
1363 MACRO_3VECT(coor[ii], c0+a*diff0, c1 , c2 ); ii++;
1364 MACRO_3VECT(coor[ii], c0 , c1-a*diff1, c2 ); ii++;
1365 MACRO_3VECT(coor[ii], c0 , c1+a*diff1, c2 ); ii++;
1366 MACRO_3VECT(coor[ii], c0 , c1 , c2-a*diff2); ii++;
1367 MACRO_3VECT(coor[ii], c0 , c1 , c2+a*diff2); ii++;
1368 a = 0.66289786904352112 * 0.5;
1369 MACRO_3VECT(coor[ii], c0-a*diff0, c1 , c2 ); ii++;
1370 MACRO_3VECT(coor[ii], c0+a*diff0, c1 , c2 ); ii++;
1371 MACRO_3VECT(coor[ii], c0 , c1-a*diff1, c2 ); ii++;
1372 MACRO_3VECT(coor[ii], c0 , c1+a*diff1, c2 ); ii++;
1373 MACRO_3VECT(coor[ii], c0 , c1 , c2-a*diff2); ii++;
1374 MACRO_3VECT(coor[ii], c0 , c1 , c2+a*diff2); ii++;
1375 a = 1.0306143700994171 * 0.5;
1376 MACRO_3VECT(coor[ii], c0-a*diff0, c1-a*diff1, c2 ); ii++;
1377 MACRO_3VECT(coor[ii], c0-a*diff0, c1+a*diff1, c2 ); ii++;
1378 MACRO_3VECT(coor[ii], c0+a*diff0, c1-a*diff1, c2 ); ii++;
1379 MACRO_3VECT(coor[ii], c0+a*diff0, c1+a*diff1, c2 ); ii++;
1380 MACRO_3VECT(coor[ii], c0-a*diff0, c1 , c2-a*diff2); ii++;
1381 MACRO_3VECT(coor[ii], c0-a*diff0, c1 , c2+a*diff2); ii++;
1382 MACRO_3VECT(coor[ii], c0+a*diff0, c1 , c2-a*diff2); ii++;
1383 MACRO_3VECT(coor[ii], c0+a*diff0, c1 , c2+a*diff2); ii++;
1384 MACRO_3VECT(coor[ii], c0 , c1-a*diff1, c2-a*diff2); ii++;
1385 MACRO_3VECT(coor[ii], c0 , c1-a*diff1, c2+a*diff2); ii++;
1386 MACRO_3VECT(coor[ii], c0 , c1+a*diff1, c2-a*diff2); ii++;
1387 MACRO_3VECT(coor[ii], c0 , c1+a*diff1, c2+a*diff2); ii++;
1388 a0 = a1 = a2 = 0.66713797405746656 * 0.5;
1389 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1390 MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1391 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1392 MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1393 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1394 MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1395 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1396 MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1397
1398 break;
1399
1400 default:
1401 do_output_2(0, "error: unknown cubature rule");
1402 return -1;
1403 } /* END SWITCH */
1404
1405 /*
1406 real testSum = 0;
1407 for(i = 0; i < Ngrid; i++)
1408 testSum += weight[i];
1409 printf("testSum = %22.11f\n", testSum);
1410 printf("volume = %22.11f\n\n", volume);
1411 */
1412
1413 return Ngrid;
1414 } /* END use_cubature_rule */
1415
1416
1417 static integer
compute_grid_for_box(compute_grid_for_box_params_struct * params,integer maxlen,real (* coor)[3],real * weight,BoxStruct * box,real analyticalIntegralValue,real * workList,real * totalIntegralResult)1418 compute_grid_for_box(compute_grid_for_box_params_struct* params,
1419 integer maxlen,
1420 real (*coor)[3],
1421 real *weight,
1422 BoxStruct* box,
1423 real analyticalIntegralValue,
1424 real* workList,
1425 real* totalIntegralResult)
1426 {
1427 #define MAX_NO_OF_TEST_POINTS 88
1428
1429 integer noOfGridPoints;
1430 integer Ngrid;
1431 integer i;
1432 real Iapprox, Iexact;
1433 BoxStruct box1;
1434 BoxStruct box2;
1435 integer bestcoord, nPoints1, nPoints2;
1436 real abserror, dist, maxdist, halfway;
1437 real IexactAbs;
1438 real analyticalIntegralBox1, analyticalIntegralBox2;
1439 integer splitBox;
1440
1441 /*return 0; */
1442
1443 /* Define Ngrid points inside box, with corresponding weights */
1444 /* this is where the 'cubature rule' is used */
1445 Ngrid = use_cubature_rule(maxlen, coor, weight, box, CUBATURE_RULE);
1446 if(Ngrid <= 0)
1447 {
1448 do_output_2(0, "error in use_cubature_rule");
1449 return -1;
1450 }
1451 noOfGridPoints = Ngrid;
1452
1453 /* compute approximate and exact integrals for box */
1454
1455 Iapprox = 0;
1456
1457 Iapprox = compute_integral_from_points(
1458 ¶ms->density,
1459 params->noOfNonzeroShells,
1460 params->nonZeroShellsIndexList,
1461 params->noOfNonzeroDistributions,
1462 params->nonZeroDistrIndexList,
1463 Ngrid,
1464 &coor[0],
1465 weight,
1466 workList);
1467
1468 Iexact = analyticalIntegralValue;
1469
1470 IexactAbs = Iexact;
1471 if(IexactAbs < 0) IexactAbs *= -1;
1472
1473 /* compute absolute error */
1474 abserror = Iexact - Iapprox;
1475 if(abserror < 0) abserror *= -1;
1476
1477 /* check if error is too large */
1478 splitBox = 1;
1479
1480 if((abserror*100) < params->maxerrorPerBox)
1481 splitBox = 0;
1482
1483 if((splitBox == 1) && (abserror < params->maxerrorPerBox))
1484 {
1485 /* it seems that the error is small enough. */
1486 /* however, this could be a coincidence. */
1487 /* to check, compare with denser grid */
1488 real testCoor[MAX_NO_OF_TEST_POINTS][3];
1489 real testWeight[MAX_NO_OF_TEST_POINTS];
1490 real testIapprox;
1491 integer Ngrid2, testAbsError;
1492
1493 Ngrid2 = use_cubature_rule(MAX_NO_OF_TEST_POINTS,
1494 testCoor, testWeight, box, CUBATURE_RULE_2);
1495 if(Ngrid2 <= 0)
1496 {
1497 do_output_2(0, "error in use_cubature_rule");
1498 return -1;
1499 }
1500
1501 testIapprox =
1502 compute_integral_from_points(
1503 ¶ms->density,
1504 params->noOfNonzeroShells,
1505 params->nonZeroShellsIndexList,
1506 params->noOfNonzeroDistributions,
1507 params->nonZeroDistrIndexList,
1508 Ngrid2,
1509 &testCoor[0],
1510 testWeight,
1511 workList);
1512 testAbsError = fabs(Iexact - testIapprox);
1513 /* we demand that the denser grid should give better result */
1514 /*printf("abserror = %66.55f\n", abserror); */
1515 /*printf("testAbsError = %66.55f\n\n", testAbsError); */
1516 if(testAbsError <= abserror)
1517 splitBox = 0;
1518
1519 /*splitBox = 0; */
1520 }
1521 if(splitBox == 1)
1522 {
1523 /* error too large, split box into box1 and box2 */
1524
1525 /* first determine in which coordinate direction to do the split */
1526 maxdist = 0;
1527 bestcoord = -1;
1528 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1529 {
1530 dist = box->max[i] - box->min[i];
1531 if(dist > maxdist)
1532 {
1533 maxdist = dist;
1534 bestcoord = i;
1535 }
1536 } /* END FOR i */
1537 if(bestcoord < 0)
1538 return -1;
1539 /* now create new boxes box1 and box2 */
1540 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1541 {
1542 if(i == bestcoord)
1543 {
1544 /* direction of split */
1545 halfway = (box->max[i] + box->min[i]) / 2;
1546 box1.min[i] = box->min[i];
1547 box1.max[i] = halfway;
1548 box2.min[i] = halfway;
1549 box2.max[i] = box->max[i];
1550 }
1551 else
1552 {
1553 /* other direction, simply copy bounds */
1554 box1.min[i] = box->min[i];
1555 box1.max[i] = box->max[i];
1556 box2.min[i] = box->min[i];
1557 box2.max[i] = box->max[i];
1558 }
1559 } /* END FOR i */
1560 /* now boxes box1 and box2 are now created */
1561
1562 analyticalIntegralBox1 = 0;
1563 for(i = 0; i < params->density.noOfDistributions; i++)
1564 analyticalIntegralBox1 +=
1565 compute_integral_over_box(¶ms->density.distrList[i], &box1);
1566
1567 analyticalIntegralBox2 =
1568 analyticalIntegralValue - analyticalIntegralBox1;
1569
1570 /* create grid points for box1 */
1571 nPoints1 = compute_grid_for_box(params,
1572 maxlen,
1573 coor,
1574 weight,
1575 &box1,
1576 analyticalIntegralBox1,
1577 workList,
1578 totalIntegralResult);
1579 if(nPoints1 < 0)
1580 return -1;
1581 /* create grid points for box2 */
1582 nPoints2 = compute_grid_for_box(params,
1583 maxlen-nPoints1,
1584 &coor[nPoints1],
1585 &weight[nPoints1],
1586 &box2,
1587 analyticalIntegralBox2,
1588 workList,
1589 totalIntegralResult);
1590 if(nPoints2 < 0)
1591 return -1;
1592 noOfGridPoints = nPoints1 + nPoints2;
1593 } /* END IF error too large */
1594 else
1595 {
1596 /* error acceptable, */
1597 /* the computed grid points for this box are good enough. */
1598 /* do nothing more, just return the number of points */
1599 *totalIntegralResult += Iapprox;
1600 }
1601
1602 return noOfGridPoints;
1603 } /* END compute_grid_for_box */
1604
1605
1606
1607 static rhoTreeNode*
BuildRhoTreeBranch(integer noOfDistributionsTot,DistributionSpecStruct * rho_alt_1,ShellSpecStruct * rho_alt_2,integer distrIndexListN,integer * distrIndexList,real targetRhoError)1608 BuildRhoTreeBranch(integer noOfDistributionsTot,
1609 DistributionSpecStruct* rho_alt_1,
1610 ShellSpecStruct* rho_alt_2,
1611 integer distrIndexListN,
1612 integer* distrIndexList,
1613 real targetRhoError)
1614 {
1615 rhoTreeNode* newNode;
1616 integer i, j, samePoint, n1, n2, bestCoord;
1617 BoxStruct tempBox;
1618 real currCoord, currDiff, maxDiff, limit, extent1, extent2, testCoord;
1619 rhoTreeNode* child1;
1620 rhoTreeNode* child2;
1621 integer* tempList;
1622 integer tempInt;
1623
1624 if(distrIndexListN < 1)
1625 {
1626 printf("error in BuildRhoTreeBranch: (distrIndexListN < 1), "
1627 "distrIndexListN = %i\n", distrIndexListN);
1628 return NULL;
1629 }
1630
1631 newNode = dal_malloc_safe(sizeof(rhoTreeNode));
1632
1633 /* compute bounding box for this node */
1634 if(rho_alt_1 != NULL)
1635 {
1636 if(get_distribution_box(&newNode->box,
1637 &rho_alt_1[distrIndexList[0]],
1638 targetRhoError) != 0)
1639 return NULL;
1640 }
1641 else
1642 {
1643 if(get_shell_box(&newNode->box, &rho_alt_2[distrIndexList[0]]) != 0)
1644 return NULL;
1645 }
1646 for(i = 1; i < distrIndexListN; i++)
1647 {
1648 if(rho_alt_1 != NULL)
1649 {
1650 if(get_distribution_box(&tempBox,
1651 &rho_alt_1[distrIndexList[i]],
1652 targetRhoError) != 0)
1653 return NULL;
1654 }
1655 else
1656 {
1657 if(get_shell_box(&tempBox, &rho_alt_2[distrIndexList[i]]) != 0)
1658 return NULL;
1659 }
1660 for(j = 0; j < NO_OF_DIMENSIONS; j++)
1661 {
1662 if(tempBox.min[j] < newNode->box.min[j])
1663 newNode->box.min[j] = tempBox.min[j];
1664 if(tempBox.max[j] > newNode->box.max[j])
1665 newNode->box.max[j] = tempBox.max[j];
1666 } /* END FOR j */
1667 } /* END FOR i */
1668
1669 /* check if only one distr */
1670 if(distrIndexListN == 1)
1671 {
1672 /* OK, this becomes a leaf node */
1673 newNode->child1 = NULL;
1674 newNode->child2 = NULL;
1675 newNode->distrIndex = distrIndexList[0];
1676 return newNode;
1677 }
1678
1679 /* There is more than one distribution */
1680 /* Get box that encloses all distributions */
1681 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1682 {
1683 if(rho_alt_1 != NULL)
1684 {
1685 tempBox.min[i] = rho_alt_1[distrIndexList[0]].centerCoords[i];
1686 tempBox.max[i] = rho_alt_1[distrIndexList[0]].centerCoords[i];
1687 }
1688 else
1689 {
1690 tempBox.min[i] = rho_alt_2[distrIndexList[0]].centerCoords[i];
1691 tempBox.max[i] = rho_alt_2[distrIndexList[0]].centerCoords[i];
1692 }
1693 } /* END FOR i */
1694 for(i = 1; i < distrIndexListN; i++)
1695 {
1696 for(j = 0; j < NO_OF_DIMENSIONS; j++)
1697 {
1698 if(rho_alt_1 != NULL)
1699 currCoord = rho_alt_1[distrIndexList[i]].centerCoords[j];
1700 else
1701 currCoord = rho_alt_2[distrIndexList[i]].centerCoords[j];
1702 if(tempBox.min[j] > currCoord) tempBox.min[j] = currCoord;
1703 if(tempBox.max[j] < currCoord) tempBox.max[j] = currCoord;
1704 } /* END FOR j */
1705 } /* END FOR i */
1706
1707 /* check if all distrs are at the same point */
1708
1709 bestCoord = -1;
1710 maxDiff = 0;
1711 for(i = 0; i < NO_OF_DIMENSIONS; i++)
1712 {
1713 currDiff = tempBox.max[i] - tempBox.min[i];
1714 if(currDiff > maxDiff)
1715 {
1716 bestCoord = i;
1717 maxDiff = currDiff;
1718 }
1719 } /* END FOR i */
1720 if(bestCoord < 0)
1721 samePoint = 1;
1722 else
1723 {
1724 if(maxDiff > 1.0e-13)
1725 {
1726 limit = (tempBox.max[bestCoord] + tempBox.min[bestCoord]) / 2;
1727 samePoint = 0;
1728 }
1729 else
1730 samePoint = 1;
1731 }
1732
1733 if(samePoint == 1)
1734 {
1735 /* all distrs are at the same point */
1736 /* sort by extent */
1737 /* bubble sort (this could be optimized) */
1738 for(i = 0; i < (distrIndexListN-1); i++)
1739 {
1740 for(j = 0; j < (distrIndexListN-1-i); j++)
1741 {
1742 if(rho_alt_1 != NULL)
1743 {
1744 extent1 = rho_alt_1[distrIndexList[j]].extent;
1745 extent2 = rho_alt_1[distrIndexList[j+1]].extent;
1746 }
1747 else
1748 {
1749 extent1 = rho_alt_2[distrIndexList[j]].extent;
1750 extent2 = rho_alt_2[distrIndexList[j+1]].extent;
1751 }
1752 if(extent1 > extent2)
1753 {
1754 /* do switch */
1755 tempInt = distrIndexList[j];
1756 distrIndexList[j] = distrIndexList[j+1];
1757 distrIndexList[j+1] = tempInt;
1758 } /* END IF SWITCH */
1759 } /* END FOR j bubble sort */
1760 } /* END FOR i bubble sort */
1761 /* check sort */
1762 for(i = 0; i < (distrIndexListN-1); i++)
1763 {
1764 if(rho_alt_1 != NULL)
1765 {
1766 extent1 = rho_alt_1[distrIndexList[i]].extent;
1767 extent2 = rho_alt_1[distrIndexList[i+1]].extent;
1768 }
1769 else
1770 {
1771 extent1 = rho_alt_2[distrIndexList[i]].extent;
1772 extent2 = rho_alt_2[distrIndexList[i+1]].extent;
1773 }
1774 if(extent1 > extent2)
1775 {
1776 do_output_2(0,
1777 "error in BuildRhoTreeBranch: list not sorted");
1778 return NULL;
1779 }
1780 } /* END FOR i check sort */
1781
1782 /* create 2 new boxes: small extent and large extent */
1783 n1 = distrIndexListN / 2;
1784 n2 = distrIndexListN - n1;
1785 }
1786 else
1787 {
1788 /* all distrs are NOT at the same point */
1789 limit = (tempBox.max[bestCoord] + tempBox.min[bestCoord]) / 2;
1790 tempList = dal_malloc_safe(distrIndexListN * sizeof(integer));
1791 n1 = 0;
1792 n2 = 0;
1793 for(i = 0; i < distrIndexListN; i++)
1794 {
1795 if(rho_alt_1 != NULL)
1796 testCoord = rho_alt_1[distrIndexList[i]].centerCoords[bestCoord];
1797 else
1798 testCoord = rho_alt_2[distrIndexList[i]].centerCoords[bestCoord];
1799 if(testCoord > limit)
1800 {
1801 tempList[n1] = distrIndexList[i];
1802 n1++;
1803 }
1804 else
1805 {
1806 tempList[distrIndexListN-1-n2] = distrIndexList[i];
1807 n2++;
1808 }
1809 } /* END FOR i */
1810 if((n1 == 0) || (n2 == 0))
1811 {
1812 printf("error in BuildRhoTreeBranch (after split): "
1813 "n1 = %i, n2 = %i\n", n1, n2);
1814 printf("maxDiff = %.55f\n", maxDiff);
1815 return NULL;
1816 }
1817
1818 memcpy(distrIndexList, tempList, distrIndexListN * sizeof(integer));
1819 dal_free(tempList);
1820 }
1821 if((n1 == 0) || (n2 == 0))
1822 {
1823 printf("error in BuildRhoTreeBranch: n1 = %i, n2 = %i\n", n1, n2);
1824 return NULL;
1825 }
1826 child1 = BuildRhoTreeBranch(noOfDistributionsTot, rho_alt_1, rho_alt_2,
1827 n1, distrIndexList, targetRhoError);
1828 if(child1 == NULL)
1829 return NULL;
1830 child2 = BuildRhoTreeBranch(noOfDistributionsTot, rho_alt_1, rho_alt_2,
1831 n2, distrIndexList + n1, targetRhoError);
1832 if(child2 == NULL)
1833 return NULL;
1834 newNode->child1 = child1;
1835 newNode->child2 = child2;
1836 newNode->distrIndex = -1;
1837
1838 return newNode;
1839 } /* END */
1840
1841
1842 static rhoTreeNode*
BuildRhoTree(integer noOfDistributions,DistributionSpecStruct * rho_alt_1,ShellSpecStruct * rho_alt_2,real targetRhoError)1843 BuildRhoTree(integer noOfDistributions,
1844 DistributionSpecStruct* rho_alt_1,
1845 ShellSpecStruct* rho_alt_2,
1846 real targetRhoError)
1847 {
1848 rhoTreeNode* rootNode;
1849 integer* distrIndexList;
1850 integer i;
1851 real targetError, arg, r1;
1852 DistributionSpecStruct* distr;
1853
1854 if(rho_alt_1 != NULL)
1855 {
1856 /* compute extent for each distribution in list */
1857 for(i = 0; i < noOfDistributions; i++)
1858 {
1859 distr = &rho_alt_1[i];
1860 targetError = distr->coeff / 1e20;
1861 arg = distr->coeff / targetError;
1862 r1 = log(arg);
1863 if(r1 < 0) r1 *= -1;
1864 distr->extent = sqrt(r1 / distr->exponent);
1865 } /* END FOR i */
1866 }
1867
1868 /* set up initial index list: all distributions included */
1869 distrIndexList = dal_malloc_safe(noOfDistributions * sizeof(integer));
1870 for(i = 0; i < noOfDistributions; i++)
1871 distrIndexList[i] = i;
1872
1873 rootNode = BuildRhoTreeBranch(noOfDistributions, rho_alt_1, rho_alt_2,
1874 noOfDistributions, distrIndexList,
1875 targetRhoError);
1876
1877 if(rootNode == NULL)
1878 {
1879 do_output_2(0, "error in BuildRhoTreeBranch");
1880 return NULL;
1881 }
1882
1883 dal_free(distrIndexList);
1884
1885 return rootNode;
1886 } /* END BuildRhoTree */
1887
1888
1889
1890
1891
1892
free_rho_tree_memory(rhoTreeNode * rootNode)1893 static void free_rho_tree_memory(rhoTreeNode* rootNode)
1894 {
1895 rhoTreeNode* child1;
1896 rhoTreeNode* child2;
1897 child1 = rootNode->child1;
1898 child2 = rootNode->child2;
1899 if(child1 != NULL)
1900 free_rho_tree_memory(child1);
1901 if(child2 != NULL)
1902 free_rho_tree_memory(child2);
1903 dal_free(rootNode);
1904 } /* END free_rho_tree_memory */
1905
1906
round_real(real x)1907 static integer round_real(real x)
1908 {
1909 integer x1, x2;
1910 real err1, err2;
1911
1912 x1 = (integer)x;
1913 x2 = x1 + 1;
1914 err1 = x - (real)x1;
1915 err2 = (real)x2 - x;
1916 if(err1 <= err2)
1917 return x1;
1918 else
1919 return x2;
1920 }
1921
1922
1923
1924 void*
compute_grid_thread_func(void * arg)1925 compute_grid_thread_func(void* arg)
1926 {
1927 /* char s[888]; */
1928 integer maxNoOfPoints;
1929 real (*coor)[3];
1930 real* weight;
1931 real* coorx;
1932 real* coory;
1933 real* coorz;
1934 integer noOfShells, noOfDistributions;
1935 integer noOfNonzeroShells;
1936 integer* nonZeroShellIndexList;
1937 integer noOfNonzeroDistributions;
1938 integer* nonZeroDistrIndexList;
1939 integer currShellNo, prevShellNo, tempInt;
1940 integer* listShlblocks;
1941 compute_grid_for_box_params_struct paramsStruct;
1942 real* workList;
1943 real totalIntegralResult;
1944 integer writeResultsToFile;
1945 DistributionSpecStruct* rhoForSubBox;
1946 integer noOfWrittenBatches, noOfGridPoints;
1947 BoxStruct startBox;
1948 BoxStruct subBox;
1949 DensitySpecStruct* density;
1950 compute_grid_thread_func_struct* inputParams;
1951 rhoTreeNode* rhoTreeRootNode;
1952 rhoTreeNode* rhoTreeRootNodeShells;
1953 integer* tempList;
1954 integer i, j, k, m, ii, jj, kk;
1955 integer Nx, Ny, Nz;
1956 integer nFunctions, count, nPoints, nblocks, blockStarted;
1957 integer startShellNo, NthisWrite;
1958 real Iexact, maxerror;
1959 FILE* gridFile;
1960 integer jobCount, assignedJobNumber;
1961 ShellSpecStruct* currShell;
1962
1963 /* get hold of input params */
1964 inputParams = (compute_grid_thread_func_struct*)arg;
1965 density = inputParams->density;
1966 memcpy(&startBox, inputParams->startBox, sizeof(BoxStruct));
1967 rhoTreeRootNode = inputParams->rhoTreeRootNode;
1968 rhoTreeRootNodeShells = inputParams->rhoTreeRootNodeShells;
1969 noOfShells = density->noOfShells;
1970 noOfDistributions = density->noOfDistributions;
1971 Nx = inputParams->Nx;
1972 Ny = inputParams->Ny;
1973 Nz = inputParams->Nz;
1974 maxerror = inputParams->maxerror;
1975 gridFile = inputParams->gridFile;
1976
1977 writeResultsToFile = 1;
1978
1979 do_output_2(2, "thread %i entering compute_grid_thread_func..",
1980 inputParams->threadNo);
1981
1982 /* allocate memory */
1983 maxNoOfPoints = FILE_BATCH_N;
1984 coor = dal_malloc_safe(3 * maxNoOfPoints * sizeof(real));
1985 weight = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1986 coorx = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1987 coory = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1988 coorz = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1989 nonZeroShellIndexList = dal_malloc_safe(noOfShells * sizeof(integer));
1990 nonZeroDistrIndexList = dal_malloc_safe(density->nbast * sizeof(integer));
1991 workList = dal_malloc_safe(density->nbast *
1992 MAX_NO_OF_POINTS_PER_BATCH * sizeof(real));
1993 rhoForSubBox = dal_malloc_safe(noOfDistributions *
1994 sizeof(DistributionSpecStruct));
1995 tempList = dal_malloc_safe(noOfDistributions * sizeof(integer));
1996 listShlblocks = dal_malloc_safe(MAX_NO_OF_SHLBLOCKS * 2 * sizeof(integer));
1997
1998
1999 /* get initial assignedJobNumber */
2000 #ifdef USE_PTHREADS
2001 pthread_mutex_lock(inputParams->jobMutex);
2002 #endif
2003 assignedJobNumber = *inputParams->currJobNumber;
2004 *inputParams->currJobNumber += N_BATCH_JOBS;
2005 #ifdef USE_PTHREADS
2006 pthread_mutex_unlock(inputParams->jobMutex);
2007 #endif
2008 jobCount = 0;
2009 noOfWrittenBatches = 0;
2010 totalIntegralResult = 0;
2011 noOfGridPoints = 0;
2012 i = j = k = 0;
2013 for(i = 0; i < Nx; i++)
2014 {
2015 for(j = 0; j < Ny; j++)
2016 {
2017 for(k = 0; k < Nz; k++)
2018 {
2019 jobCount++;
2020 if(jobCount >= (assignedJobNumber + N_BATCH_JOBS))
2021 {
2022 /* get new assignedJobNumber */
2023 #ifdef USE_PTHREADS
2024 pthread_mutex_lock(inputParams->jobMutex);
2025 #endif
2026 assignedJobNumber = *inputParams->currJobNumber;
2027 *inputParams->currJobNumber += N_BATCH_JOBS;
2028 #ifdef USE_PTHREADS
2029 pthread_mutex_unlock(inputParams->jobMutex);
2030 #endif
2031 }
2032 if(jobCount < assignedJobNumber)
2033 continue;
2034 if(assignedJobNumber > (Nx*Ny*Nz))
2035 continue;
2036 /*printf("i = %i, j = %i, k = %i\n", i, j, k); */
2037
2038 /* determine current sub-box */
2039 subBox.min[0] = startBox.min[0] + (real)(i + 0) *
2040 (startBox.max[0] - startBox.min[0]) / Nx;
2041 subBox.max[0] = startBox.min[0] + (real)(i + 1) *
2042 (startBox.max[0] - startBox.min[0]) / Nx;
2043 subBox.min[1] = startBox.min[1] + (real)(j + 0) *
2044 (startBox.max[1] - startBox.min[1]) / Ny;
2045 subBox.max[1] = startBox.min[1] + (real)(j + 1) *
2046 (startBox.max[1] - startBox.min[1]) / Ny;
2047 subBox.min[2] = startBox.min[2] + (real)(k + 0) *
2048 (startBox.max[2] - startBox.min[2]) / Nz;
2049 subBox.max[2] = startBox.min[2] + (real)(k + 1) *
2050 (startBox.max[2] - startBox.min[2]) / Nz;
2051
2052 /* get list of non-zero shells for current sub-box */
2053 noOfNonzeroShells = get_distrs_for_box(nonZeroShellIndexList,
2054 rhoTreeRootNodeShells,
2055 &subBox);
2056 if(noOfNonzeroShells < 0)
2057 {
2058 do_output_2(0, "error in get_distrs_for_box");
2059 return NULL;
2060 }
2061 if(noOfNonzeroShells == 0)
2062 continue;
2063
2064 /* sort list of non-zero shells (bubble sort, could be optimized) */
2065 for(kk = 0; kk < (noOfNonzeroShells - 1); kk++)
2066 {
2067 for(jj = 0; jj < (noOfNonzeroShells - 1 - kk); jj++)
2068 {
2069 if(nonZeroShellIndexList[jj] >
2070 nonZeroShellIndexList[jj+1])
2071 {
2072 tempInt = nonZeroShellIndexList[jj];
2073 nonZeroShellIndexList[jj] =
2074 nonZeroShellIndexList[jj+1];
2075 nonZeroShellIndexList[jj+1] = tempInt;
2076 }
2077 } /* END FOR jj */
2078 } /* END FOR kk */
2079
2080 /* translate list of nonzero shells to list of */
2081 /* nonzero contracted distributions */
2082 noOfNonzeroDistributions = 0;
2083 for(kk = 0; kk < noOfNonzeroShells; kk++)
2084 {
2085 currShell = &density->shellList[nonZeroShellIndexList[kk]];
2086 nFunctions = 1 + 2 *
2087 currShell->shellType;
2088 for(ii = 0; ii < nFunctions; ii++)
2089 {
2090 nonZeroDistrIndexList[noOfNonzeroDistributions] =
2091 currShell->startIndexInMatrix + ii;
2092 noOfNonzeroDistributions++;
2093 } /* END FOR ii */
2094 } /* END FOR kk */
2095 if(noOfNonzeroDistributions > density->nbast)
2096 {
2097 do_output_2(0, "error: (noOfNonzeroDistributions > nbast)");
2098 return NULL;
2099 }
2100
2101 /* get list of relevant distributions for sub-box */
2102 count = get_distrs_for_box(tempList, rhoTreeRootNode, &subBox);
2103 if(count < 0)
2104 {
2105 do_output_2(0, "error in get_distrs_for_box");
2106 return NULL;
2107 }
2108 if(count == 0)
2109 continue;
2110
2111 for(m = 0; m < count; m++)
2112 memcpy(&rhoForSubBox[m],
2113 &density->distrList[tempList[m]],
2114 sizeof(DistributionSpecStruct));
2115
2116 Iexact = 0;
2117 for(kk = 0; kk < count; kk++)
2118 Iexact +=
2119 compute_integral_over_box(&rhoForSubBox[kk], &subBox);
2120
2121 /* create grid for sub-box */
2122
2123 memcpy(¶msStruct.density,
2124 density,
2125 sizeof(DensitySpecStruct));
2126 paramsStruct.density.noOfDistributions = count;
2127 paramsStruct.density.distrList = rhoForSubBox;
2128
2129 paramsStruct.maxerrorPerBox = maxerror;
2130 paramsStruct.nonZeroDistrIndexList = nonZeroDistrIndexList;
2131 paramsStruct.noOfNonzeroDistributions = noOfNonzeroDistributions;
2132 paramsStruct.nonZeroShellsIndexList = nonZeroShellIndexList;
2133 paramsStruct.noOfNonzeroShells = noOfNonzeroShells;
2134
2135 nPoints = compute_grid_for_box(¶msStruct,
2136 maxNoOfPoints,
2137 &coor[0],
2138 &weight[0],
2139 &subBox,
2140 Iexact,
2141 workList,
2142 &totalIntegralResult);
2143 if(nPoints < 0)
2144 {
2145 do_output_2(0, "error in compute_grid_for_box");
2146 return NULL;
2147 }
2148
2149 if(nPoints == 0)
2150 continue;
2151
2152 noOfGridPoints += nPoints;
2153
2154 if(writeResultsToFile == 1)
2155 {
2156 integer nPointsLeft;
2157 /* make block-list of non-zero shells to write to file */
2158 nblocks = 0;
2159 blockStarted = 0;
2160 startShellNo = -1;
2161 prevShellNo = -1;
2162 for(kk = 0; kk < noOfNonzeroShells; kk++)
2163 {
2164 currShellNo = nonZeroShellIndexList[kk];
2165 if(blockStarted == 0)
2166 {
2167 blockStarted = 1;
2168 startShellNo = currShellNo;
2169 }
2170 else
2171 {
2172 if(currShellNo != (prevShellNo + 1))
2173 {
2174 /* register previous block */
2175 listShlblocks[nblocks*2] = startShellNo+1;
2176 listShlblocks[nblocks*2+1] = prevShellNo+1;
2177 nblocks++;
2178 startShellNo = currShellNo;
2179 }
2180 }
2181 prevShellNo = currShellNo;
2182 } /* END FOR kk */
2183 if(blockStarted == 1)
2184 {
2185 /* register previous block */
2186 listShlblocks[nblocks*2] = startShellNo+1;
2187 listShlblocks[nblocks*2+1] = prevShellNo+1;
2188 nblocks++;
2189 }
2190
2191 /* set up separate x, y, z vectors for writing to file */
2192 for(kk = 0; kk < nPoints; kk++)
2193 {
2194 coorx[kk] = coor[kk][0];
2195 coory[kk] = coor[kk][1];
2196 coorz[kk] = coor[kk][2];
2197 }
2198
2199 /* write grid points to file */
2200 nPointsLeft = nPoints;
2201 #ifdef USE_PTHREADS
2202 pthread_mutex_lock(inputParams->fileMutex);
2203 #endif
2204 while(nPointsLeft > 0)
2205 {
2206 if(nPointsLeft <= MAX_NO_OF_POINTS_PER_WRITE)
2207 NthisWrite = nPointsLeft;
2208 else
2209 NthisWrite = MAX_NO_OF_POINTS_PER_WRITE;
2210 fwrite(&NthisWrite, sizeof(integer), 1, gridFile);
2211 fwrite(&nblocks, sizeof(integer), 1, gridFile);
2212 fwrite(listShlblocks, sizeof(integer), 2*nblocks, gridFile);
2213 #if 1
2214 fwrite(&(coor[nPoints-nPointsLeft][0]),
2215 sizeof(real), 3*NthisWrite, gridFile);
2216 #else
2217 fwrite(&coorx[nPoints-nPointsLeft],
2218 sizeof(real), NthisWrite, gridFile);
2219 fwrite(&coory[nPoints-nPointsLeft],
2220 sizeof(real), NthisWrite, gridFile);
2221 fwrite(&coorz[nPoints-nPointsLeft],
2222 sizeof(real), NthisWrite, gridFile);
2223 #endif
2224 fwrite(&weight[nPoints-nPointsLeft],
2225 sizeof(real), NthisWrite, gridFile);
2226 nPointsLeft -= NthisWrite;
2227 noOfWrittenBatches++;
2228 } /* END WHILE points left */
2229 #ifdef USE_PTHREADS
2230 pthread_mutex_unlock(inputParams->fileMutex);
2231 #endif
2232 } /* END IF (gridFile != NULL) */
2233 } /* END FOR k */
2234 } /* END FOR j */
2235 } /* END FOR i */
2236
2237
2238 do_output_2(2, "thread %i loops done, freeing memory..",
2239 inputParams->threadNo);
2240
2241 /* free memory */
2242 dal_free(coor);
2243 dal_free(weight);
2244 dal_free(coorx);
2245 dal_free(coory);
2246 dal_free(coorz);
2247 dal_free(nonZeroShellIndexList);
2248 dal_free(nonZeroDistrIndexList);
2249 dal_free(workList);
2250 dal_free(rhoForSubBox);
2251 dal_free(tempList);
2252 dal_free(listShlblocks);
2253
2254 do_output_2(2, "thread %i mem freed OK, setting result params..",
2255 inputParams->threadNo);
2256
2257 /* report results through input structure */
2258 inputParams->noOfPoints = noOfGridPoints;
2259 inputParams->noOfWrittenBatches = noOfWrittenBatches;
2260 inputParams->integralResult = totalIntegralResult;
2261
2262 do_output_2(2, "thread %i exiting compute_grid_thread_func",
2263 inputParams->threadNo);
2264
2265 return NULL;
2266 } /* END compute_grid_thread_func */
2267
2268
2269
2270 int
do_test_integration(DensitySpecStruct * density,char * gridFileName)2271 do_test_integration(DensitySpecStruct* density, char* gridFileName)
2272 {
2273 real (*coor)[3];
2274 real* weight;
2275 real* coorx;
2276 real* coory;
2277 real* coorz;
2278 integer noOfNonzeroShells;
2279 integer* nonZeroShellIndexList;
2280 integer noOfNonzeroDistributions;
2281 integer* nonZeroDistrIndexList;
2282 integer finished, currIndex;
2283 integer* listShlblocks;
2284 real* workList;
2285 integer nRepeats, repeatNo;
2286 real testIntegralResult;
2287 integer maxNoOfPoints, noOfShells, noOfDistributions;
2288 /* char s[888]; */
2289 time_t startSeconds2, endSeconds2;
2290 clock_t startClock, endClock;
2291 FILE* gridFile;
2292 integer nPoints, nblocks, ii, kk, nFunctions;
2293 integer nPointsDone, nPointsThisTime, nBatches;
2294 real secondsTakenReal;
2295
2296
2297 noOfShells = density->noOfShells;
2298 if(noOfShells <= 0)
2299 return -1;
2300
2301 noOfDistributions = density->noOfDistributions;
2302 if(noOfDistributions <= 0)
2303 return -1;
2304
2305 /* allocate memory */
2306 maxNoOfPoints = FILE_BATCH_N;
2307 coor = dal_malloc_safe(3 * maxNoOfPoints * sizeof(real));
2308 weight = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2309 coorx = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2310 coory = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2311 coorz = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2312 nonZeroShellIndexList = dal_malloc_safe(noOfShells * sizeof(integer));
2313 nonZeroDistrIndexList = dal_malloc_safe(density->nbast * sizeof(integer));
2314 workList = dal_malloc_safe(density->nbast *
2315 MAX_NO_OF_POINTS_PER_BATCH * sizeof(real));
2316 listShlblocks = dal_malloc_safe(MAX_NO_OF_SHLBLOCKS * 2 * sizeof(integer));
2317
2318
2319 nRepeats = 1;
2320 do_output_2(2, "grid created OK, computing integral to check time, "
2321 "nRepeats = %i", nRepeats);
2322
2323 time(&startSeconds2);
2324
2325 for(repeatNo = 0; repeatNo < nRepeats; repeatNo++)
2326 {
2327 testIntegralResult = 0;
2328 startClock = clock();
2329 gridFile = fopen(gridFileName, "rb");
2330 if(gridFile == NULL)
2331 {
2332 do_output_2(0, "error opening grid file for reading");
2333 return -1;
2334 }
2335 finished = 0;
2336 nBatches = 0;
2337 while(finished == 0)
2338 {
2339 if(fread(&nPoints, sizeof(integer), 1, gridFile) != 1)
2340 finished = 1;
2341 else
2342 {
2343 if(fread(&nblocks, sizeof(integer), 1, gridFile) != 1)
2344 {
2345 do_output_2(0, "error reading nblocks");
2346 return -1;
2347 }
2348 if(fread(listShlblocks, sizeof(integer), 2*nblocks, gridFile)
2349 != (2*nblocks))
2350 {
2351 do_output_2(0, "error reading blocks");
2352 return -1;
2353 }
2354 if(fread(coor, sizeof(real), 3*nPoints, gridFile) != 3*nPoints)
2355 return -1;
2356 /*
2357 if(fread(coorx, sizeof(real), nPoints, gridFile) != nPoints)
2358 return -1;
2359 if(fread(coory, sizeof(real), nPoints, gridFile) != nPoints)
2360 return -1;
2361 if(fread(coorz, sizeof(real), nPoints, gridFile) != nPoints)
2362 return -1;
2363 for(kk = 0; kk < nPoints; kk++)
2364 {
2365 coor[kk][0] = coorx[kk];
2366 coor[kk][1] = coory[kk];
2367 coor[kk][2] = coorz[kk];
2368 }
2369 */
2370 if(fread(&weight[0], sizeof(real), nPoints, gridFile) != nPoints)
2371 return -1;
2372
2373
2374 noOfNonzeroShells = 0;
2375 for(kk = 0; kk < nblocks; kk++)
2376 {
2377 currIndex = listShlblocks[kk*2];
2378 while(currIndex <= listShlblocks[kk*2+1])
2379 {
2380 nonZeroShellIndexList[noOfNonzeroShells] = currIndex-1;
2381 noOfNonzeroShells++;
2382 currIndex++;
2383 } /* END WHILE */
2384 } /* END FOR kk */
2385
2386 /* translate list of nonzero shells to list of nonzero
2387 * contracted distributions */
2388 noOfNonzeroDistributions = 0;
2389 for(kk = 0; kk < noOfNonzeroShells; kk++)
2390 {
2391 nFunctions = 1 + 2 * density->shellList[nonZeroShellIndexList[kk]].shellType;
2392 for(ii = 0; ii < nFunctions; ii++)
2393 {
2394 nonZeroDistrIndexList[noOfNonzeroDistributions] =
2395 density->shellList[nonZeroShellIndexList[kk]].startIndexInMatrix + ii;
2396 noOfNonzeroDistributions++;
2397 } /* END FOR ii */
2398 } /* END FOR kk */
2399
2400 nPointsDone = 0;
2401 while(nPointsDone < nPoints)
2402 {
2403 if((nPoints - nPointsDone) > MAX_NO_OF_POINTS_PER_BATCH)
2404 nPointsThisTime = MAX_NO_OF_POINTS_PER_BATCH;
2405 else
2406 nPointsThisTime = nPoints - nPointsDone;
2407
2408 testIntegralResult +=
2409 compute_integral_from_points(density,
2410 noOfNonzeroShells,
2411 nonZeroShellIndexList,
2412 noOfNonzeroDistributions,
2413 nonZeroDistrIndexList,
2414 nPointsThisTime,
2415 &coor[nPointsDone],
2416 &weight[nPointsDone],
2417 workList);
2418 nPointsDone += nPointsThisTime;
2419 } /* END WHILE (nPointsDone < nPoints) */
2420
2421 #if 0
2422 for(kk = 0; kk < nPoints; kk++)
2423 {
2424 testIntegralResult +=
2425 compute_value_at_point_2(nonZeroShellIndexList,
2426 noOfNonzeroShells,
2427 shellList2,
2428 noOfShells,
2429 nbast,
2430 dmat,
2431 &coor[kk],
2432 nonZeroDistrIndexList,
2433 noOfNonzeroDistributions)
2434 * weight[kk];
2435 } /* END FOR kk */
2436 #endif
2437
2438 nBatches++;
2439 } /* END ELSE nPoints read OK */
2440 } /* END WHILE more in file */
2441 endClock = clock();
2442 secondsTakenReal = (real)(endClock - startClock) / CLOCKS_PER_SEC;
2443 do_output_2(2, "integral computed, result: %.11f, "
2444 "took %.3f s (time value may overflow for > 1000 s)",
2445 testIntegralResult, secondsTakenReal);
2446 do_output_2(2, "nBatches from test integration: %i", nBatches);
2447 fclose(gridFile);
2448 } /* END FOR repeatNo */
2449
2450 time(&endSeconds2);
2451 do_output_2(2, "time for test integration: %i s",
2452 (integer)(endSeconds2 - startSeconds2));
2453
2454 /* free memory */
2455 dal_free(coor);
2456 dal_free(weight);
2457 dal_free(workList);
2458 dal_free(coorx);
2459 dal_free(coory);
2460 dal_free(coorz);
2461 dal_free(nonZeroShellIndexList);
2462 dal_free(nonZeroDistrIndexList);
2463 dal_free(listShlblocks);
2464
2465 return 0;
2466 } /* END do_test_integration */
2467
2468
compute_grid(DensitySpecStruct * density,real maxerror,real boxdist,real targetRhoError,char * gridFileName,integer noOfThreads,integer doTestIntegration)2469 integer compute_grid(
2470 DensitySpecStruct* density,
2471 real maxerror,
2472 real boxdist,
2473 real targetRhoError,
2474 char* gridFileName,
2475 integer noOfThreads,
2476 integer doTestIntegration
2477 )
2478 {
2479 FILE* gridFile;
2480 char ss[888];
2481 char sss[888];
2482 integer noOfGridPoints;
2483 BoxStruct startBox;
2484 BoxStruct tempBox;
2485 integer i, j;
2486 rhoTreeNode* rhoTreeRootNode;
2487 rhoTreeNode* rhoTreeRootNodeShells;
2488 clock_t startTime, timeTaken;
2489 time_t startSeconds, endSeconds;
2490 real Iexact, absRelError;
2491 integer Nxyz[3]; /* Nx Ny Nz */
2492 integer Nx, Ny, Nz;
2493 integer IexactInteger;
2494 integer correctValueInt;
2495 real correctValue, abserrorRel;
2496 integer noOfWrittenBatches;
2497 real totalIntegralResult;
2498 integer noOfDistributions, writeResultsToFile;
2499 integer currJobNumber, noOfShells;
2500 real megaBytes;
2501 compute_grid_thread_func_struct* threadParamsList;
2502
2503 do_output_2(2, "entering compute_grid..\n");
2504
2505 noOfShells = density->noOfShells;
2506 if(noOfShells <= 0)
2507 return -1;
2508
2509 noOfDistributions = density->noOfDistributions;
2510 if(noOfDistributions <= 0)
2511 return -1;
2512
2513
2514 make_float_string(ss, maxerror);
2515 make_float_string(sss, targetRhoError);
2516 do_output_2(2, "Entering compute_grid, noOfDistributions = %i, "
2517 "maxerror = %s, targetRhoError = %s",
2518 noOfDistributions, ss, sss);
2519
2520 time(&startSeconds);
2521 do_output_2(2, "compute_grid start time: %s", ctime(&startSeconds));
2522
2523 megaBytes = (real)(density->nbast * density->nbast * sizeof(real)) / 1000000;
2524 do_output_2(2, "nbast = %i, dmat uses %.1f Mb", density->nbast, megaBytes);
2525
2526 /* set up starting box */
2527 if(get_distribution_box(&startBox,
2528 &density->distrList[0],
2529 targetRhoError) != 0)
2530 {
2531 do_output_2(0, "error in get_distribution_box");
2532 return -1;
2533 }
2534 for(i = 1; i < noOfDistributions; i++)
2535 {
2536 if(get_distribution_box(&tempBox,
2537 &density->distrList[i],
2538 targetRhoError) != 0)
2539 {
2540 do_output_2(0, "error in get_distribution_box");
2541 return -1;
2542 }
2543 for(j = 0; j < NO_OF_DIMENSIONS; j++)
2544 {
2545 if(tempBox.min[j] < startBox.min[j])
2546 startBox.min[j] = tempBox.min[j];
2547 if(tempBox.max[j] > startBox.max[j])
2548 startBox.max[j] = tempBox.max[j];
2549 } /* END FOR j */
2550 } /* END FOR i */
2551
2552 do_output_2(2, "compute_grid starting box:");
2553 print_box(&startBox, 2);
2554
2555 Iexact = 0;
2556 for(i = 0; i < noOfDistributions; i++)
2557 Iexact += compute_integral_over_box(&density->distrList[i], &startBox);
2558 do_output_2(2, "Analytical integral over starting box: %.22f", Iexact);
2559 IexactInteger = round_real(Iexact);
2560 absRelError = fabs((double)IexactInteger - Iexact) / (double)IexactInteger;
2561 make_float_string(ss, absRelError);
2562 do_output_2(2, "Assuming that the correct value is %i, "
2563 "the relative error is %s", IexactInteger, ss);
2564
2565 do_output_2(2, "calling BuildRhoTree...");
2566 startTime = clock();
2567 rhoTreeRootNode = BuildRhoTree(noOfDistributions,
2568 density->distrList,
2569 NULL,
2570 targetRhoError);
2571 if(rhoTreeRootNode == NULL)
2572 {
2573 do_output_2(0, "error in BuildRhoTree\n");
2574 return -1;
2575 }
2576 timeTaken = clock() - startTime;
2577 do_output_2(2, "BuildRhoTree returned OK, time taken: %.2f s",
2578 ((double)timeTaken)/CLOCKS_PER_SEC);
2579
2580 do_output_2(2, "calling BuildRhoTree for shells...");
2581 startTime = clock();
2582 rhoTreeRootNodeShells = BuildRhoTree(noOfShells,
2583 NULL,
2584 density->shellList,
2585 targetRhoError);
2586 if(rhoTreeRootNodeShells == NULL)
2587 {
2588 do_output_2(0, "error in BuildRhoTree\n");
2589 return -1;
2590 }
2591 timeTaken = clock() - startTime;
2592 do_output_2(2, "BuildRhoTree for shells returned OK, time taken: %.2f s",
2593 ((double)timeTaken)/CLOCKS_PER_SEC);
2594
2595 /* compute Nx Ny Nz */
2596 for(i = 0; i < 3; i++)
2597 Nxyz[i] = 1 + (integer)((startBox.max[i] - startBox.min[i]) / boxdist);
2598 Nx = Nxyz[0];
2599 Ny = Nxyz[1];
2600 Nz = Nxyz[2];
2601 do_output_2(2, "boxdist = %f, Nx = %i, Ny = %i, Nz = %i, Ntot = %i",
2602 boxdist, Nx, Ny, Nz, Nx*Ny*Nz);
2603
2604 /* create grid file */
2605 gridFile = fopen(gridFileName, "wb");
2606 if(gridFile == NULL)
2607 {
2608 do_output_2(0, "error opening grid file '%s' for writing", gridFileName);
2609 return -1;
2610 }
2611
2612 writeResultsToFile = 1;
2613
2614
2615 /* up to this point there is no parallellization */
2616 /* this is where we start to think about threading */
2617
2618 #ifdef USE_PTHREADS
2619 pthread_mutex_t fileMutex = PTHREAD_MUTEX_INITIALIZER;
2620 pthread_mutex_t jobMutex = PTHREAD_MUTEX_INITIALIZER;
2621 #endif
2622
2623 threadParamsList = dal_malloc_safe(noOfThreads *
2624 sizeof(compute_grid_thread_func_struct));
2625
2626 currJobNumber = 1;
2627
2628 for(i = 0; i < noOfThreads; i++)
2629 {
2630 threadParamsList[i].density = density;
2631 threadParamsList[i].rhoTreeRootNode = rhoTreeRootNode;
2632 threadParamsList[i].rhoTreeRootNodeShells = rhoTreeRootNodeShells;
2633 threadParamsList[i].maxerror = maxerror;
2634 threadParamsList[i].gridFile = gridFile;
2635 threadParamsList[i].startBox = &startBox;
2636 threadParamsList[i].Nx = Nx;
2637 threadParamsList[i].Ny = Ny;
2638 threadParamsList[i].Nz = Nz;
2639 #ifdef USE_PTHREADS
2640 threadParamsList[i].fileMutex = &fileMutex;
2641 threadParamsList[i].jobMutex = &jobMutex;
2642 #endif
2643 threadParamsList[i].currJobNumber = &currJobNumber;
2644 threadParamsList[i].noOfPoints = -1;
2645 threadParamsList[i].noOfWrittenBatches = 0;
2646 threadParamsList[i].integralResult = 0;
2647 threadParamsList[i].threadNo = i;
2648 } /* END FOR i */
2649
2650
2651 #ifndef USE_PTHREADS
2652 do_output_2(2, "USE_PTHREADS not set, no threads created");
2653 if(noOfThreads != 1)
2654 {
2655 do_output_2(0, "error: cannot skip threads when (noOfThreads != 1)");
2656 return -1;
2657 }
2658 compute_grid_thread_func(&threadParamsList[0]);
2659 #else
2660 do_output_2(2, "Starting %i threads.", noOfThreads);
2661
2662 /* start threads */
2663 for(i = 0; i < noOfThreads; i++)
2664 {
2665 if(pthread_create(&threadParamsList[i].thread,
2666 NULL,
2667 compute_grid_thread_func,
2668 &threadParamsList[i]) != 0)
2669 {
2670 do_output_2(0, "Error in pthread_create for thread %i", i);
2671 do_output_2(0, "waiting for already created threads..");
2672 for(j = 0; j < i; j++)
2673 {
2674 if(pthread_join(threadParamsList[j].thread, NULL) != 0)
2675 {
2676 do_output_2(0, "Error in pthread_join for thread %i", j);
2677 }
2678 } /* END FOR j */
2679 do_output_2(0, "all threads finished, returning error code");
2680 return -1;
2681 }
2682 } /* END FOR i */
2683
2684 do_output_2(2, "%i threads started OK.", noOfThreads);
2685
2686 /* wait for threads to finish */
2687 for(i = 0; i < noOfThreads; i++)
2688 {
2689 if(pthread_join(threadParamsList[i].thread, NULL) != 0)
2690 {
2691 do_output_2(0, "Error in pthread_join for thread %i", i);
2692 }
2693 } /* END FOR i */
2694
2695 do_output_2(2, "all %i threads have finished:", noOfThreads);
2696 for(i = 0; i < noOfThreads; i++)
2697 do_output_2(2, "thread %2i noOfWrittenBatches = %6i",
2698 i, threadParamsList[i].noOfWrittenBatches);
2699 #endif
2700
2701 /* now all threads have finished, check for errors */
2702 for(i = 0; i < noOfThreads; i++)
2703 {
2704 if(threadParamsList[i].noOfPoints < 0)
2705 {
2706 do_output_2(0, "error in compute_grid_thread_func"
2707 " for thread %i\n", i);
2708 return -1;
2709 }
2710 } /* END FOR i */
2711
2712
2713
2714 noOfGridPoints = 0;
2715 noOfWrittenBatches = 0;
2716 totalIntegralResult = 0;
2717 for(i = 0; i < noOfThreads; i++)
2718 {
2719 noOfGridPoints += threadParamsList[i].noOfPoints;
2720 noOfWrittenBatches += threadParamsList[i].noOfWrittenBatches;
2721 totalIntegralResult += threadParamsList[i].integralResult;
2722 } /* END FOR i */
2723
2724 fclose(gridFile);
2725
2726 do_output_2(2, "noOfWrittenBatches = %i", noOfWrittenBatches);
2727
2728 time(&endSeconds);
2729 do_output_2(1, "compute_grid ending OK, noOfGridPoints = %i, took %i s",
2730 noOfGridPoints, (integer)(endSeconds - startSeconds));
2731 do_output_2(2, "compute_grid totalIntegralResult = %.11f",
2732 totalIntegralResult);
2733 correctValueInt = round_real(totalIntegralResult);
2734 correctValue = correctValueInt;
2735 abserrorRel = fabs(totalIntegralResult - correctValue) / correctValue;
2736 make_float_string(ss, abserrorRel);
2737 do_output_2(2, "Assuming that the correct value is %i,\n"
2738 "the absolute relative error is %s", correctValueInt, ss);
2739
2740 if(doTestIntegration == 1)
2741 {
2742 if(do_test_integration(density, gridFileName) != 0)
2743 {
2744 do_output_2(0, "error in do_test_integration");
2745 return -1;
2746 }
2747 } /* END IF */
2748
2749
2750 free_rho_tree_memory(rhoTreeRootNode);
2751 free_rho_tree_memory(rhoTreeRootNodeShells);
2752
2753 dal_free(threadParamsList);
2754
2755 time(&endSeconds);
2756 do_output_2(2, "compute_grid finish time: %s", ctime(&endSeconds));
2757
2758 return noOfGridPoints;
2759 } /* END compute_grid */
2760
2761
2762 static integer
do_merge_sort_distrs(integer n,DistributionSpecStruct * list,DistributionSpecStruct * workList)2763 do_merge_sort_distrs(integer n,
2764 DistributionSpecStruct* list,
2765 DistributionSpecStruct* workList)
2766 {
2767 /* merge sort: */
2768 /* first sort the first half, */
2769 /* then sort the second half, */
2770 /* then merge results to form final sorted list. */
2771 integer n1, n2, nn, decision, i1, i2, i;
2772 DistributionSpecStruct* d1;
2773 DistributionSpecStruct* d2;
2774
2775 if(n < 1)
2776 {
2777 do_output_2(0, "(n < 1)");
2778 return -1;
2779 }
2780 if(n == 1)
2781 return 0;
2782
2783 n1 = n / 2;
2784 n2 = n - n1;
2785
2786 /* sort first half */
2787 if(do_merge_sort_distrs(n1, list, workList) != 0)
2788 return -1;
2789
2790 /* sort second half */
2791 if(do_merge_sort_distrs(n2, &list[n1], workList) != 0)
2792 return -1;
2793
2794 /* merge results */
2795 nn = 0;
2796 i1 = 0;
2797 i2 = 0;
2798 while(nn < n)
2799 {
2800 if((i1 < n1) && (i2 < n2))
2801 {
2802 /* compare */
2803 d1 = &list[i1];
2804 d2 = &list[n1+i2];
2805 decision = 0;
2806 for(i = 0; i < 3; i++)
2807 {
2808 if(decision == 0)
2809 {
2810 if(d1->monomialInts[i] != d2->monomialInts[i])
2811 {
2812 if(d1->monomialInts[i] > d2->monomialInts[i])
2813 decision = 1;
2814 else
2815 decision = 2;
2816 }
2817 } /* END IF (decision == 0) */
2818 } /* END FOR i */
2819 if(decision == 0)
2820 {
2821 /* check exponents */
2822 if(d1->exponent > d2->exponent)
2823 decision = 1;
2824 else
2825 decision = 2;
2826 }
2827 }
2828 else
2829 {
2830 if(i1 == n1)
2831 decision = 2;
2832 else
2833 decision = 1;
2834 }
2835 if(decision <= 0)
2836 {
2837 do_output_2(0, "(decision <= 0)");
2838 return -1;
2839 }
2840 if(decision == 1)
2841 {
2842 memcpy(&workList[nn], &list[i1], sizeof(DistributionSpecStruct));
2843 i1++;
2844 }
2845 else
2846 {
2847 memcpy(&workList[nn], &list[n1+i2], sizeof(DistributionSpecStruct));
2848 i2++;
2849 }
2850 nn++;
2851 } /* END WHILE (nn < n) */
2852 if(i1 != n1)
2853 {
2854 do_output_2(0, "(i1 != n1)");
2855 return -1;
2856 }
2857 if(i2 != n2)
2858 {
2859 do_output_2(0, "(i2 != n2)");
2860 return -1;
2861 }
2862 if(nn != n)
2863 {
2864 do_output_2(0, "(nn != n)");
2865 return -1;
2866 }
2867 memcpy(list, workList, n * sizeof(DistributionSpecStruct));
2868 return 0;
2869 } /* END do_merge_sort_distrs */
2870
2871
2872
2873 static integer
compute_extent_for_shells(BasisInfoStruct * basisInfo,real targetRhoError)2874 compute_extent_for_shells(BasisInfoStruct* basisInfo, real targetRhoError)
2875 {
2876 integer i;
2877 for(i = 0; i < basisInfo->noOfShells; i++)
2878 {
2879 ShellSpecStruct* currShell = &basisInfo->shellList[i];
2880 integer contr = currShell->noOfContr;
2881 integer worstIndex = -1;
2882 real largestExtent = 0;
2883 integer kk;
2884 for(kk = 0; kk < contr; kk++)
2885 {
2886 DistributionSpecStruct testDistr;
2887 BoxStruct testBox;
2888 integer j;
2889 real currExtent;
2890
2891 testDistr.coeff = currShell->coeffList[kk];
2892 testDistr.exponent = currShell->exponentList[kk];
2893 for(j = 0; j < 3; j++)
2894 testDistr.centerCoords[j] = 0;
2895 for(j = 0; j < 3; j++)
2896 testDistr.monomialInts[j] = 0;
2897 get_distribution_box(&testBox, &testDistr, targetRhoError);
2898 currExtent = (testBox.max[0] - testBox.min[0]) / 2;
2899 if(currExtent > largestExtent)
2900 {
2901 largestExtent = currExtent;
2902 worstIndex = kk;
2903 }
2904 } /* END FOR kk */
2905 if(worstIndex < 0)
2906 return -1;
2907 currShell->extent = largestExtent;
2908 } /* END FOR i */
2909 return 0;
2910 }
2911
2912
2913
2914
2915
2916 static integer
get_no_of_primitives_for_density(real cutoff,const real * dmat,BasisInfoStruct * basisInfo)2917 get_no_of_primitives_for_density(real cutoff,
2918 const real *dmat,
2919 BasisInfoStruct* basisInfo)
2920 {
2921 #define MAX_DISTR_IN_TEMP_LIST 888
2922
2923 integer i, j;
2924 integer symmetryFactor;
2925 integer nBasisFuncs, nn;
2926
2927 do_output_2(2, "entering function get_no_of_primitives_for_density, cutoff = %22.15f", cutoff);
2928
2929 nBasisFuncs = basisInfo->noOfBasisFuncs;
2930 nn = 0;
2931 for(i = 0; i < nBasisFuncs; i++)
2932 {
2933 for(j = 0; j < nBasisFuncs; j++)
2934 {
2935 DistributionSpecStruct tempList[MAX_DISTR_IN_TEMP_LIST];
2936 integer nPrimitives, k;
2937 /* the matrix M is symmetric: include diagonal terms once, */
2938 /* and include upper off-diagonal terms multiplied by 2 */
2939 if(i == j)
2940 symmetryFactor = 1;
2941 else
2942 symmetryFactor = 2;
2943 if(i > j)
2944 continue;
2945 nPrimitives =
2946 get_product_simple_primitives(basisInfo, i,
2947 basisInfo, j,
2948 tempList,
2949 MAX_DISTR_IN_TEMP_LIST);
2950 do_output_2(3, "get_product_simple_primitives returned %i",
2951 nPrimitives);
2952 if(nPrimitives <= 0)
2953 {
2954 do_output_2(0, "error in get_product_simple_primitives");
2955 return -1;
2956 }
2957 for(k = 0; k < nPrimitives; k++)
2958 {
2959 DistributionSpecStruct* currDistr = &tempList[k];
2960 real Mij = dmat[i*nBasisFuncs+j];
2961 real newCoeff = currDistr->coeff * Mij * symmetryFactor;
2962 if(fabs(newCoeff) > cutoff)
2963 nn++;
2964 }
2965 }
2966 }
2967 return nn;
2968 }
2969
2970
2971
2972
2973 static integer
get_density(DistributionSpecStruct ** rhoPtr,integer maxCountShellList,integer * noOfShellsReturn,real cutoffInp,real targetRhoError,integer nbast,const real * dmat,ShellSpecStruct * shellList,BasisFuncStruct * basisFuncList)2974 get_density(DistributionSpecStruct** rhoPtr,
2975 integer maxCountShellList,
2976 integer* noOfShellsReturn,
2977 real cutoffInp,
2978 real targetRhoError,
2979 integer nbast,
2980 const real *dmat,
2981 ShellSpecStruct* shellList,
2982 BasisFuncStruct* basisFuncList)
2983 {
2984 #define MAX_DISTR_IN_TEMP_LIST 888
2985 real cutoff = cutoffInp;
2986
2987 /*char s[888]; */
2988 integer i, j, k, kk;
2989 DistributionSpecStruct* workList;
2990 DistributionSpecStruct* rhoSaved;
2991 real absvalue;
2992 real absdiff;
2993 real sqrtValue;
2994 integer sameYesNo, firstIndex, count, withinLimit, resultCount;
2995 real coeffSum;
2996 integer* markList;
2997 integer symmetryFactor;
2998 integer nBasisFuncs, nn, nNeededForRho;
2999 BasisInfoStruct basisInfo;
3000 DistributionSpecStruct* rho;
3001
3002 do_output_2(2, "entering function get_density, cutoff = %22.15f", cutoff);
3003
3004 if(get_shells(&basisInfo) != 0)
3005 {
3006 do_output_2(0, "error in get_shells");
3007 return -1;
3008 }
3009 do_output_2(2, "get_shells returned OK, number of shells: %i",
3010 basisInfo.noOfShells);
3011 if(compute_extent_for_shells(&basisInfo, targetRhoError) != 0)
3012 {
3013 do_output_2(0, "error in compute_extent_for_shells");
3014 return -1;
3015 }
3016 do_output_2(2, "compute_extent_for_shells returned OK");
3017 if(get_basis_funcs(&basisInfo) != 0)
3018 {
3019 do_output_2(0, "error in get_basis_funcs");
3020 return -1;
3021 }
3022 do_output_2(2, "get_basis_funcs returned OK, number of basis funcs: %i",
3023 basisInfo.noOfBasisFuncs);
3024 if(get_simple_primitives_all(&basisInfo) != 0)
3025 {
3026 do_output_2(0, "error in get_simple_primitives_all");
3027 return -1;
3028 }
3029 do_output_2(2, "get_simple_primitives_all returned OK, n = %i",
3030 basisInfo.noOfSimplePrimitives);
3031
3032
3033 /* find out how much space is needed for rho */
3034 nNeededForRho = get_no_of_primitives_for_density(cutoff,
3035 dmat,
3036 &basisInfo);
3037 if(nNeededForRho <= 0)
3038 {
3039 do_output_2(0, "error in get_no_of_primitives_for_density");
3040 return -1;
3041 }
3042
3043 /* allocate rho */
3044 rho = dal_malloc_safe(nNeededForRho * sizeof(DistributionSpecStruct));
3045 *rhoPtr = rho;
3046
3047 nBasisFuncs = basisInfo.noOfBasisFuncs;
3048 nn = 0;
3049 for(i = 0; i < nBasisFuncs; i++)
3050 {
3051 for(j = 0; j < nBasisFuncs; j++)
3052 {
3053 DistributionSpecStruct tempList[MAX_DISTR_IN_TEMP_LIST];
3054 integer nPrimitives, k;
3055 /*printf("i = %i, j = %i\n", i, j); */
3056 /* the matrix M is symmetric: include diagonal terms once, */
3057 /* and include upper off-diagonal terms multiplied by 2 */
3058 if(i == j)
3059 symmetryFactor = 1;
3060 else
3061 symmetryFactor = 2;
3062 if(i > j)
3063 continue;
3064 /*printf("calling get_product_simple_primitives\n"); */
3065 nPrimitives =
3066 get_product_simple_primitives(&basisInfo, i,
3067 &basisInfo, j,
3068 tempList,
3069 MAX_DISTR_IN_TEMP_LIST);
3070 do_output_2(3, "get_product_simple_primitives returned %i",
3071 nPrimitives);
3072 if(nPrimitives <= 0)
3073 {
3074 do_output_2(0, "error in get_product_simple_primitives");
3075 return -1;
3076 }
3077 for(k = 0; k < nPrimitives; k++)
3078 {
3079 DistributionSpecStruct* currDistr = &tempList[k];
3080 real Mij = dmat[i*nBasisFuncs+j];
3081 /*printf("symmetryFactor = %i\n", symmetryFactor); */
3082 real newCoeff = currDistr->coeff * Mij * symmetryFactor;
3083 do_output_2(4, "Mij = %33.22f", Mij);
3084 do_output_2(4, "currDistr->coeff = %33.22f", currDistr->coeff);
3085 do_output_2(4, "newCoeff = %33.22f", newCoeff);
3086 if(fabs(newCoeff) > cutoff)
3087 {
3088 /* add to final list */
3089 if(nn > nNeededForRho)
3090 {
3091 do_output_2(0, "error: (nn > nNeededForRho)");
3092 return -1;
3093 }
3094 memcpy(&rho[nn], currDistr,
3095 sizeof(DistributionSpecStruct));
3096 rho[nn].coeff = newCoeff;
3097 nn++;
3098 }
3099 }
3100 }
3101 }
3102
3103 *noOfShellsReturn = basisInfo.noOfShells;
3104
3105 memcpy(shellList, basisInfo.shellList,
3106 basisInfo.noOfShells * sizeof(ShellSpecStruct));
3107
3108 memcpy(basisFuncList, basisInfo.basisFuncList,
3109 basisInfo.noOfShells * sizeof(BasisFuncStruct));
3110
3111
3112 do_output_2(2, "loop ended OK; list 'rho' created, nn = %i", nn);
3113
3114 /* Now all distributions are stored in the list 'rho'. */
3115 /* The number of entries in the list is nn. */
3116 /* It could happen that all entries are not unique. */
3117 /* We want to join distributions that have the same center */
3118 /* and the same exponent. */
3119 /* To do this, start with sorting the list by nx, ny, nz, exponent. */
3120 workList = dal_malloc_safe(nn * sizeof(DistributionSpecStruct));
3121 rhoSaved = dal_malloc_safe(nn * sizeof(DistributionSpecStruct));
3122 memcpy(rhoSaved, rho, nn * sizeof(DistributionSpecStruct));
3123
3124 do_output_2(2, "calling do_merge_sort_distrs, nn = %i", nn);
3125 if(do_merge_sort_distrs(nn, rho, workList) != 0)
3126 {
3127 do_output_2(0, "error in do_merge_sort_distrs");
3128 return -1;
3129 }
3130 do_output_2(2, "do_merge_sort_distrs returned OK");
3131
3132
3133 do_output_2(2, "checking sort..");
3134 /* check that list is sorted */
3135 for(i = 0; i < (nn-1); i++)
3136 {
3137 if(rho[i].exponent < rho[i+1].exponent)
3138 {
3139 sameYesNo = 1;
3140 for(j = 0; j < 3; j++)
3141 {
3142 if(rho[i].monomialInts[j] != rho[i+1].monomialInts[j])
3143 sameYesNo = 0;
3144 } /* END FOR j */
3145 if(sameYesNo == 1)
3146 {
3147 printf("error: distr list NOT properly sorted\n");
3148 return -1;
3149 }
3150 }
3151 } /* END FOR i */
3152 do_output_2(2, "sort checked OK");
3153
3154
3155 markList = dal_malloc_safe(nn * sizeof(integer));
3156 for(i = 0; i < nn; i++)
3157 markList[i] = 0;
3158
3159 /* now go through sorted list, joining distributions where possible */
3160 i = 0;
3161 count = 0;
3162 firstIndex = 0;
3163 while(i < nn)
3164 {
3165 /* check if this entry has the same nx ny nz as current 'firstIndex' */
3166 sameYesNo = 1;
3167 for(j = 0; j < 3; j++)
3168 {
3169 if(rho[i].monomialInts[j] != rho[firstIndex].monomialInts[j])
3170 sameYesNo = 0;
3171 } /* END FOR j */
3172 /* check exponent */
3173 absdiff = fabs(rho[i].exponent - rho[firstIndex].exponent);
3174 if(absdiff > EXPONENT_DIFF_LIMIT)
3175 sameYesNo = 0;
3176 if(sameYesNo == 0)
3177 {
3178 for(j = firstIndex; j < i; j++)
3179 {
3180 if(markList[j] == 0)
3181 {
3182 markList[j] = 1;
3183 /* join distrs that have centers within */
3184 /* DISTR_CENTER_DIST_LIMIT of this one */
3185 coeffSum = rho[j].coeff;
3186 for(k = j+1; k < i; k++)
3187 {
3188 withinLimit = 1;
3189 for(kk = 0; kk < 3; kk++)
3190 {
3191 absdiff = fabs(rho[j].centerCoords[kk] -
3192 rho[k].centerCoords[kk]);
3193 if(absdiff > DISTR_CENTER_DIST_LIMIT)
3194 withinLimit = 0;
3195 } /* END FOR kk */
3196 if(withinLimit == 1)
3197 {
3198 coeffSum += rho[k].coeff;
3199 markList[k] = 1;
3200 /*printf("found, k = %i\n", k); */
3201 }
3202 } /* END FOR k */
3203 memcpy(&workList[count],
3204 &rho[j],
3205 sizeof(DistributionSpecStruct));
3206 workList[count].coeff = coeffSum;
3207 count++;
3208 } /* END IF (markList[j] == 0) */
3209 } /* END FOR j */
3210 /*printf("setting firstIndex = %i\n", i); */
3211 firstIndex = i;
3212 }
3213 else
3214 {
3215 /*printf("sameYesNo = 1\n"); */
3216 }
3217 i++;
3218 } /* END WHILE (i < nn) */
3219 /* take care of last part */
3220 for(j = firstIndex; j < nn; j++)
3221 {
3222 if(markList[j] == 0)
3223 {
3224 markList[j] = 1;
3225 /* join distrs that have centers within */
3226 /* DISTR_CENTER_DIST_LIMIT of this one */
3227 coeffSum = rho[j].coeff;
3228 for(k = j+1; k < nn; k++)
3229 {
3230 withinLimit = 1;
3231 for(kk = 0; kk < 3; kk++)
3232 {
3233 absdiff = fabs(rho[j].centerCoords[kk] -
3234 rho[k].centerCoords[kk]);
3235 if(absdiff > DISTR_CENTER_DIST_LIMIT)
3236 withinLimit = 0;
3237 } /* END FOR kk */
3238 if(withinLimit == 1)
3239 {
3240 coeffSum += rho[k].coeff;
3241 markList[k] = 1;
3242 }
3243 } /* END FOR k */
3244 memcpy(&workList[count], &rho[j], sizeof(DistributionSpecStruct));
3245 workList[count].coeff = coeffSum;
3246 count++;
3247 } /* END IF (markList[j] == 0) */
3248 } /* END FOR j */
3249
3250 for(j = 0; j < nn; j++)
3251 {
3252 if(markList[j] != 1)
3253 {
3254 printf("error: (markList[%i] != 1)\n", j);
3255 return -1;
3256 }
3257 } /* END FOR j */
3258
3259
3260 /* now move results back to list 'rho', */
3261 /* skipping those that have too small coeff */
3262 resultCount = 0;
3263 for(i = 0; i < count; i++)
3264 {
3265 sqrtValue = sqrt(CONSTPI / workList[i].exponent);
3266 absvalue = workList[i].coeff * sqrtValue * sqrtValue * sqrtValue;
3267 if(absvalue < 0) absvalue *= -1;
3268 if(absvalue > cutoff)
3269 {
3270 memcpy(&rho[resultCount],
3271 &workList[i],
3272 sizeof(DistributionSpecStruct));
3273 resultCount++;
3274 }
3275 } /* END FOR i */
3276 /*memcpy(rho, workList, count * sizeof(DistributionSpecStruct)); */
3277
3278 do_output_2(2, "nn = %9i", nn);
3279 do_output_2(2, "count = %9i", count);
3280 do_output_2(2, "resultCount = %9i", resultCount);
3281
3282 /*dal_free(list); */
3283 /*dal_free(indexList); */
3284 dal_free(workList);
3285 dal_free(markList);
3286 dal_free(rhoSaved);
3287
3288 return resultCount;
3289 } /* END read_density_file */
3290
3291
3292
3293 void
do_cartesian_grid(integer nbast,const real * dmat,DftGridReader * res)3294 do_cartesian_grid(integer nbast, const real* dmat, DftGridReader* res)
3295 {
3296 DistributionSpecStruct* rho;
3297 ShellSpecStruct* shellList;
3298 BasisFuncStruct* basisFuncList;
3299 integer noOfShells, nGridPoints, noOfDistributions;
3300 DensitySpecStruct density;
3301
3302 /* check if new grid must be created */
3303 if((global_gridCount == 0) ||
3304 ((global_gridCount+1) % global_nFreeze) == 0)
3305 {
3306
3307 /* check dmat */
3308 real maxabs = 0;
3309 integer i;
3310 for(i = 0; i < nbast*nbast; i++)
3311 {
3312 real temp = fabs(dmat[i]);
3313 if(temp > maxabs)
3314 maxabs = temp;
3315 }
3316 do_output_2(2, "do_cartesian_grid checking dmat: maxabs = %22.15f",
3317 maxabs);
3318
3319 /* allocate memory */
3320 /*printf("allocating memory..\n"); */
3321 shellList =
3322 dal_malloc(global_maxNoOfShells *
3323 sizeof(ShellSpecStruct));
3324 basisFuncList =
3325 dal_malloc(nbast * sizeof(BasisFuncStruct));
3326
3327 noOfDistributions =
3328 get_density(&rho,
3329 global_maxNoOfShells,
3330 &noOfShells,
3331 global_distrCutoff,
3332 global_targetRhoError,
3333 nbast,
3334 dmat,
3335 shellList,
3336 basisFuncList);
3337 if(noOfDistributions <= 0)
3338 {
3339 fprintf(stderr, "error in read_density_file\n");
3340 fort_print("error in read_density_file");
3341 abort();
3342 return;
3343 }
3344
3345 density.noOfShells = noOfShells;
3346 density.shellList = shellList;
3347 density.nbast = nbast;
3348 density.dmat = dmat;
3349 density.basisFuncList = basisFuncList;
3350 density.noOfDistributions = noOfDistributions;
3351 density.distrList = rho;
3352
3353 /* get grid */
3354 nGridPoints = compute_grid(&density,
3355 global_maxerror,
3356 global_boxdist,
3357 global_targetRhoError,
3358 "DALTON.cQUAD",
3359 global_nThreads,
3360 global_doTestIntegration);
3361 if(nGridPoints <= 0)
3362 {
3363 fprintf(stderr, "error in compute_grid");
3364 fort_print("error in compute_grid");
3365 abort();
3366 return;
3367 }
3368
3369 /* free memory */
3370 free(shellList);
3371 free(rho);
3372 free(basisFuncList);
3373
3374 } /* END IF create new grid */
3375
3376 global_gridCount++;
3377 return;
3378 }
3379
3380
3381 int output_energy_counter = 0;
3382 FILE* energy_file = NULL;
3383
3384 void
output_energy_(double * energyPtr)3385 output_energy_(double* energyPtr)
3386 {
3387 double energy = *energyPtr;
3388 char s[888];
3389
3390 output_energy_counter++;
3391 sprintf(s, "Energy %2i: %20.12f", output_energy_counter, energy);
3392 do_output_2(1, s);
3393 if(energy_file == NULL)
3394 {
3395 energy_file = fopen("energy_file.txt", "wt");
3396 }
3397 fprintf(energy_file, "%s\n", s);
3398 fflush(energy_file);
3399 }
3400
3401