1 /*
2 
3 
4 !
5 !  Dalton, a molecular electronic structure program
6 !  Copyright (C) by the authors of Dalton.
7 !
8 !  This program is free software; you can redistribute it and/or
9 !  modify it under the terms of the GNU Lesser General Public
10 !  License version 2.1 as published by the Free Software Foundation.
11 !
12 !  This program is distributed in the hope that it will be useful,
13 !  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 !  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 !  Lesser General Public License for more details.
16 !
17 !  If a copy of the GNU LGPL v2.1 was not distributed with this
18 !  code, you can obtain one at https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html.
19 !
20 
21 !
22 
23 */
24 /* The multi-scale cartesian cubature grid generator. The original
25  * implementation described in M. Challacombe, JCP 113(22),
26  * p.10037. This one is modified wrt to the paper.
27  *
28  *  Elias Rudberg, 2004-04
29 */
30 
31 #define __CVERSION__
32 #if !defined(SYS_DEC)
33 /* XOPEN compliance is missing on old Tru64 4.0E Alphas and pow() prototype
34  * is not specified. */
35 #define _XOPEN_SOURCE          500
36 #define _XOPEN_SOURCE_EXTENDED 1
37 #endif
38 
39 #include <stdlib.h>
40 #include <math.h>
41 #include <stdio.h>
42 #include <errno.h>
43 #include <memory.h>
44 #include <time.h>
45 #include <stdarg.h>
46 #include "general.h"
47 #include "grid-gen.h"
48 #include "basisinfo.h"
49 
50 /* #define USE_PTHREADS */
51 #ifdef USE_PTHREADS
52 #include <pthread.h>
53 #endif
54 
55 static const integer CUBATURE_RULE = 3;
56 static const integer CUBATURE_RULE_2 = 6;
57 static const integer NO_OF_DIMENSIONS = 3;  /* 1, 2 or 3 */
58 static const real CONSTPI = M_PI;
59 
60 
61 /* Whether hierarchical cubature should be used or not. Modified by
62  * a keyword in the input file. */
63 int global_useHiCu = 0;
64 
65 static integer    global_maxNoOfShells     = 44444;
66 
67 /* This variable is used to keep track of when it is time to
68    create a new grid, together with parameter global_nFreeze. */
69 static integer    global_gridCount = 0;
70 
71 
72 /* ---------- CONFIGURABLE PARAMETERS ------------------------------
73    ---------- CONFIGURABLE PARAMETERS ------------------------------
74    ---------- CONFIGURABLE PARAMETERS ------------------------------ */
75 /* number of threads */
76 static integer    global_nThreads          = 1;
77 
78 /* flag for test integration. If turned on, the grid file is
79 reopened after it has been created, and the density is integrated
80 using the newly created grid. Just to check that it gives the
81 same result as reported by the grid generation and by
82 the dft integrator. */
83 static integer    global_doTestIntegration = 0;
84 
85 /* Output level. 0 means minimum output, 1 means little output,
86    2 means a lot of output. */
87 static integer    global_outputLevel       = 1;
88 
89 /* Number of iterations to use the same grid, before creating
90 a new one. nFreeze=1 gives a new grid for each iteration.
91 nFreeze=1000 means that the first grid is used throughout
92 the whole calculation. */
93 static integer    global_nFreeze           = 1000;
94 
95 /* Threshold value for distributions. A gaussian is ignored in areas
96 where its value is below this threshold. A low value is
97 computationally expensive. */
98 static real   global_targetRhoError = 1.0e-10;
99 
100 /* Cutoff value used to decide which gaussian products should
101 be ignored. A product with a lower coefficient than this
102 will be thrown away. */
103 static real   global_distrCutoff    = 1.0e-11;
104 
105 /* 3d space is partitioned into boxes of this size. */
106 static real   global_boxdist        = 1.5;
107 
108 /* Main threshold error value for grid generation. The difference of
109 analytical and numerical integrals is forced below this value.
110 This is the most important parameter, and probably the only one
111 that a typical user should worry about. */
112 static real   global_maxerror       = 1.0e-7;
113 
114 
115 
116 
117 
118 
119 #define USE_EXP_STD
120 #define USE_ERF_STD
121 #define DO_EXTRA_ERROR_CHECKING
122 #define FILE_BATCH_N 200000
123 #define MAX_NO_OF_POINTS_PER_BATCH 100
124 #define MAX_NO_OF_SHLBLOCKS 44444
125 #define EXPONENT_DIFF_LIMIT 1e-22
126 #define DISTR_CENTER_DIST_LIMIT 1e-22
127 #define N_BATCH_JOBS 22
128 #define MAX_NO_OF_POINTS_PER_WRITE 50000
129 
130 #if !defined(INSTALL_WRKMEM)
131 #define USE_TEST_VERSION
132 #endif
133 
134 /*#define USE_TEST_VERSION */
135 
136 /*#define SKIP_THREADS */
137 
138 
139 #ifdef USE_PTHREADS
140 pthread_mutex_t globalOutputMutex = PTHREAD_MUTEX_INITIALIZER;
141 #endif
142 
143 
144 /*//////////////////////////////////////////////////////////////////////// */
145 /*/////////////////  typedef section  //////////////////////////////////// */
146 /*//////////////////////////////////////////////////////////////////////// */
147 
148 #if 0
149 struct DistributionSpecContr_{
150   integer noOfContr;
151   real coeffList[MAX_NO_OF_CONTR_GAUSSIANS];
152   real exponentList[MAX_NO_OF_CONTR_GAUSSIANS];
153   real centerCoords[3]; /* x0, y0, z0 */
154   integer monomialInts[3];  /* nx, ny, nz */
155 };
156 typedef struct DistributionSpecContr_ DistributionSpecContr;
157 #endif
158 
159 typedef struct
160 {
161   integer noOfShells;
162   ShellSpecStruct* shellList;
163   integer nbast;
164   const real* dmat;
165   BasisFuncStruct* basisFuncList;
166   integer noOfDistributions;
167   DistributionSpecStruct* distrList;
168 } DensitySpecStruct;
169 
170 
171 
172 struct BoxStruct_{
173     real min[3]; /* xmin, ymin, zmin */
174     real max[3]; /* xmax, ymax, zmax */
175 };
176 typedef struct BoxStruct_ BoxStruct;
177 
178 struct rhoTreeNode_{
179   BoxStruct box;
180     struct rhoTreeNode_* child1; /* NULL for leaf node */
181     struct rhoTreeNode_* child2; /* NULL for leaf node */
182     integer distrIndex;      /* -1 for non-leaf node */
183 };
184 typedef struct rhoTreeNode_ rhoTreeNode;
185 
186 typedef struct
187 {
188   DensitySpecStruct density;
189   integer noOfNonzeroDistributions;
190   integer* nonZeroDistrIndexList;
191   integer noOfNonzeroShells;
192   integer* nonZeroShellsIndexList;
193   real maxerrorPerBox;
194 } compute_grid_for_box_params_struct;
195 
196   typedef struct
197   {
198     DensitySpecStruct* density;
199     rhoTreeNode* rhoTreeRootNode;
200     rhoTreeNode* rhoTreeRootNodeShells;
201     real maxerror;
202     FILE* gridFile;
203     BoxStruct* startBox;
204     integer Nx;
205     integer Ny;
206     integer Nz;
207 #ifdef USE_PTHREADS
208     pthread_mutex_t* fileMutex;
209     pthread_mutex_t* jobMutex;
210     pthread_t thread;
211 #endif
212     integer* currJobNumber;
213       integer noOfPoints;         /* OUTPUT */
214       integer noOfWrittenBatches; /* OUTPUT */
215       real integralResult;    /* OUTPUT */
216     integer threadNo;
217   } compute_grid_thread_func_struct;
218 
219 
220 /*//////////////////////////////////////////////////////////////////////// */
221 /*/////////////////  end of typedef section  ///////////////////////////// */
222 /*//////////////////////////////////////////////////////////////////////// */
223 
224 
225 /* Solid harmonics based on the table 6.3 of Molecular
226  * Electronic-Structure Theory by Helgaker, Jørgensen and Olsen. */
227 
228 #define solid_harmonic_s_0(x, y, z, x2, y2, z2, r2) 1
229 
230 #define solid_harmonic_p_0(x, y, z, x2, y2, z2, r2) x
231 #define solid_harmonic_p_1(x, y, z, x2, y2, z2, r2) y
232 #define solid_harmonic_p_2(x, y, z, x2, y2, z2, r2) z
233 
234 #define solid_harmonic_d_0(x, y, z, x2, y2, z2, r2) (x * y)
235 #define solid_harmonic_d_1(x, y, z, x2, y2, z2, r2) (y * z)
236 #define solid_harmonic_d_2(x, y, z, x2, y2, z2, r2) ((2 * z2 - x2 - y2) / (2 * sqrt(3)))
237 #define solid_harmonic_d_3(x, y, z, x2, y2, z2, r2) (x * z)
238 #define solid_harmonic_d_4(x, y, z, x2, y2, z2, r2) (0.5 * (x2 - y2))
239 
240 #define solid_harmonic_f_0(x, y, z, x2, y2, z2, r2) ((0.5 * sqrt(2.5) * (3 * x2 - y2) * y) / sqrt(15))
241 #define solid_harmonic_f_1(x, y, z, x2, y2, z2, r2) (x * y * z)
242 #define solid_harmonic_f_2(x, y, z, x2, y2, z2, r2) (0.5 * sqrt(1.5) * (5 * z2 - r2) * y / sqrt(15))
243 #define solid_harmonic_f_3(x, y, z, x2, y2, z2, r2) (0.5 * (5 * z2 - 3 * r2) * z / sqrt(15))
244 #define solid_harmonic_f_4(x, y, z, x2, y2, z2, r2) (0.5 * sqrt(1.5) * (5 * z2 - r2) * x / sqrt(15))
245 #define solid_harmonic_f_5(x, y, z, x2, y2, z2, r2) (0.5 * (x2 - y2) * z)
246 #define solid_harmonic_f_6(x, y, z, x2, y2, z2, r2) (0.5 * sqrt(2.5) * (x2 - 3 * y2) * x / sqrt(15))
247 
248 
249 
250 void
do_output_2(integer prio,const char * format,...)251 do_output_2(integer prio, const char* format, ...)
252 {
253   char s[888];
254   va_list a;
255 
256   if(prio > global_outputLevel)
257     return;
258 
259 #ifdef USE_PTHREADS
260   pthread_mutex_lock(&globalOutputMutex);
261 #endif
262 
263   va_start(a, format);
264   vsnprintf(s, sizeof(s), format, a);
265   va_end(a);
266 
267 #if defined USE_TEST_VERSION
268   printf("USE_TEST_VERSION defined\n");
269   printf("%s\n", s);
270 #else
271   fort_print(s);
272 #endif
273 
274 #ifdef USE_PTHREADS
275   pthread_mutex_unlock(&globalOutputMutex);
276 #endif
277 }
278 
279 
280 
281 static void
do_error_exit(const char * s)282 do_error_exit(const char* s)
283 {
284   char ss[888];
285   sprintf(ss, "error_exit: %s\n", s);
286   printf("%s\n",ss);
287   do_output_2(0, ss);
288   fprintf(stderr,"%s\n", ss);
289   exit(1);
290 }
291 
292 void*
dal_malloc_safe_(size_t sz,const char * place,integer line)293 dal_malloc_safe_(size_t sz, const char *place, integer line)
294 {
295   void* res = malloc(sz);
296   if(!res) {
297     do_output_2(0, "error in dal_malloc_safe, '%s', sz = %i, line %i\n",
298 		place, sz, line);
299     do_error_exit("error in dal_malloc_safe_");
300   }
301   return res;
302 }
303 
304 void
dal_free(void * a)305 dal_free(void *a)
306 {
307   free(a);
308 }
309 
310 #define dal_malloc_safe(sz) dal_malloc_safe_((sz),__FUNCTION__, __LINE__)
311 
312 
313 
314 
315 
316 #if 0
317 void
318 dftgridparams_(char *line, integer len)
319 {
320     integer i = 0, tokenlen;
321     char *sp;
322     char* endPtr;
323     printf("dftgridparams_ len = %i\n", len);
324     printf("parsing '%s'\n", line);
325     endPtr = line + len;
326     for(i=0; i<len; i+=tokenlen) {
327         sp = strchr(line+i, ' ');
328         /*printf("parsing '%s'\n", line+i); */
329         if(sp)
330             tokenlen = sp-(line+i)+1;
331         else tokenlen=100000;
332         /*printf("tokenlen = %i\n", tokenlen); */
333         if(strncmp(line+i, "box=", 4) ==0) {
334             global_boxdist = atof(line+i+4);
335             printf("New box distance %f\n", global_boxdist);
336             continue;
337         }
338         if(strncmp(line+i, "maxerror=", 9) ==0) {
339             global_maxerror = atof(line+i+9);
340             printf("New maxerror %g\n", global_maxerror);
341             continue;
342         }
343         if(strncmp(line+i, "nthreads=", 9) ==0) {
344             global_nThreads = atoi(line+i+9);
345             printf("New global_nThreads %i\n", global_nThreads);
346             continue;
347         }
348         if(strncmp(line+i, "dotest=", 7) ==0) {
349             global_doTestIntegration = atoi(line+i+7);
350             printf("New global_doTestIntegration %i\n",
351                    global_doTestIntegration);
352             continue;
353         }
354         if(strncmp(line+i, "usehicu=", 8) ==0) {
355             global_useHiCu = atoi(line+i+8);
356             printf("New global_useHiCu %i\n",
357                    global_useHiCu);
358             continue;
359         }
360         if(strncmp(line+i, "nfreeze=", 8) ==0) {
361             global_nFreeze = atoi(line+i+8);
362             printf("New global_nFreeze %i\n",
363                    global_nFreeze);
364             continue;
365         }
366         if(strncmp(line+i, "output=", 7) ==0) {
367             global_outputLevel = atoi(line+i+7);
368             printf("New global_outputLevel %i\n", global_outputLevel);
369             continue;
370         }
371     } /* END FOR i           */
372 }
373 #endif
374 
375 
make_float_string(char * s,real x)376 static void make_float_string(char* s, real x)
377 {
378   real temp;
379   integer power;
380   power = 0;
381   temp = x;
382   while(fabs(temp) > 9.999)
383     {
384       temp /= 10;
385       power++;
386     }
387   while(fabs(temp) < 0.999)
388     {
389       temp *= 10;
390       power--;
391     }
392   sprintf(s, "%.1fe%i", (double)temp, power);
393 }
394 
395 
396 
397 
398 static integer
parseParam(char * s)399 parseParam(char* s)
400 {
401 /* use #define instead of more modern static const integer to please old
402  * compilers. */
403 #define MAX_BYTES 222
404   char* p = s;
405   char* endPtr = s + strlen(s);
406   char paramName[MAX_BYTES];
407   char paramValueString[MAX_BYTES];
408   /* look for = */
409   char* q = p;
410 
411   while((*q != '=') && (*q != 0))
412     q++;
413   if(*q != '=')
414     {
415       do_output_2(0, "error parsing string '%s': '=' not found", s);
416       return -1;
417     }
418   /* now q points to '=' */
419   memcpy(paramName, p, q-p);
420   paramName[q-p] = 0;
421   /*printf("paramName = '%s'\n", paramName); */
422   memcpy(paramValueString, q+1, endPtr-q-1);
423   paramValueString[endPtr-q-1] = 0;
424   /*printf("paramValueString = '%s'\n", paramValueString); */
425   if(strlen(paramName) == 0)
426     {
427       do_output_2(0, "error parsing string '%s': nothing found before '='", s);
428       return -1;
429     }
430   if(strlen(paramValueString) == 0)
431     {
432       do_output_2(0, "error parsing string '%s': nothing found after '='", s);
433       return -1;
434     }
435 
436   if(strcmp(paramName, "box") == 0)
437     {
438       real newBoxDist = atof(paramValueString);
439       if(newBoxDist <= 0)
440 	{
441 	  do_output_2(0, "error: grid param box = %f", newBoxDist);
442 	  return -1;
443 	}
444       global_boxdist = newBoxDist;
445       do_output_2(2, "grid parameter box      = %f", global_boxdist);
446       return 0;
447     }
448   if(strcmp(paramName, "maxerror") == 0)
449     {
450       real new_maxerror;
451       char ss[MAX_BYTES];
452       new_maxerror = atof(paramValueString);
453       if(new_maxerror <= 0)
454 	{
455 	  do_output_2(0, "error: grid param maxerror = %f", new_maxerror);
456 	  return -1;
457 	}
458       global_maxerror = new_maxerror;
459       make_float_string(ss, global_maxerror);
460       do_output_2(2, "grid parameter maxerror = %s", ss);
461       return 0;
462     }
463   if(strcmp(paramName, "output") == 0)
464     {
465       global_outputLevel = atoi(paramValueString);
466       do_output_2(2, "grid parameter output   = %i", global_outputLevel);
467       return 0;
468     }
469   if(strcmp(paramName, "nfreeze") == 0)
470     {
471       integer new_nFreeze = atoi(paramValueString);
472       if(new_nFreeze <= 0)
473 	{
474 	  do_output_2(0, "error: grid param nfreeze = %f", new_nFreeze);
475 	  return -1;
476 	}
477       global_nFreeze = new_nFreeze;
478       do_output_2(2, "grid parameter nfreeze  = %i", global_nFreeze);
479       return 0;
480     }
481   if(strcmp(paramName, "nthreads") == 0)
482     {
483       integer new_nThreads = atoi(paramValueString);
484       if(new_nThreads <= 0)
485 	{
486 	  do_output_2(0, "error: grid param nthreads = %f", new_nThreads);
487 	  return -1;
488 	}
489       global_nThreads = new_nThreads;
490       do_output_2(2, "grid parameter nthreads = %i", global_nThreads);
491       return 0;
492     }
493   if(strcmp(paramName, "dotest") == 0)
494     {
495       global_doTestIntegration = atoi(paramValueString);
496       do_output_2(2, "grid parameter dotest   = %i\n",
497 		  global_doTestIntegration);
498       return 0;
499     }
500   do_output_2(0, "error in grid input: unknown parameter '%s'", paramName);
501   return -1;
502 }
503 
504 /* dftcartesianinput: called from Fortran code to allow setting different
505  * parameters of the cubature code. */
506 void
dftcartesianinput_(const char * line,integer line_len)507 dftcartesianinput_(const char *line, integer line_len)
508 {
509 #define MAX_BYTES 222
510   integer inperr;
511   integer* inperrPtr = &inperr;
512   char *endPtr, *p;
513   char line2[MAX_BYTES];
514 
515   do_output_2(1, "dftcartesianinput, line_len = %i", line_len);
516   if(line_len < 0)
517     return;
518   memset(line2, 0, MAX_BYTES);
519   memcpy(line2, line, line_len);
520   line2[line_len] = 0;
521   /*printf("dftgridparams_ len = %i\n", line_len); */
522   /*printf("line after cutting at line_len:\n'%s'\n", line2); */
523   endPtr = line2 + line_len;
524   p = line2;
525   while(p < endPtr)
526     {
527       char paramBuf[MAX_BYTES];
528       char* q;
529         /* skip spaces */
530       while(*p == ' ')
531 	p++;
532       if(*p == 0)
533 	break;
534       /* now we are at the beginning of some string */
535       q = p;
536       while((*q != ' ') && (*q != 0))
537 	q++;
538       memcpy(paramBuf, p, q-p);
539       paramBuf[q-p] = 0;
540       if(parseParam(paramBuf) != 0)
541 	*inperrPtr++;
542       p = q;
543     }
544 }
545 
546 
547 static void
print_box(BoxStruct * box,integer prio)548 print_box(BoxStruct* box, integer prio)
549 {
550   integer i;
551   do_output_2(prio, "print_box:");
552   for(i = 0; i < NO_OF_DIMENSIONS; i++)
553     {
554       do_output_2(prio, "min = %.11f   max = %.11f",
555 		  (double)box->min[i], (double)box->max[i]);
556     } /* END FOR i */
557 }
558 
559 
560 static integer
get_distribution_box(BoxStruct * box,DistributionSpecStruct * distr,real targetRhoError)561 get_distribution_box(BoxStruct* box,
562 		     DistributionSpecStruct* distr,
563 		     real targetRhoError)
564 {
565   real targetError, r1, extent, arg;
566   integer i;
567   targetError = targetRhoError;
568   arg = distr->coeff / targetError;
569   if(arg < 0) arg *= -1;
570   if(arg < 1e-30)
571     {
572       do_output_2(0, "error in get_distribution_box: (arg == 0)");
573       return -1;
574     }
575   r1 = log(arg);
576   if(r1 < 0) r1 *= -1;
577   extent = sqrt(r1 / distr->exponent);
578   for(i = 0; i < NO_OF_DIMENSIONS; i++)
579     {
580       box->min[i] = distr->centerCoords[i] - extent;
581       box->max[i] = distr->centerCoords[i] + extent;
582     } /* END FOR i */
583   return 0;
584 } /* END get_distribution_box */
585 
586 static integer
get_shell_box(BoxStruct * box,ShellSpecStruct * shell)587 get_shell_box(BoxStruct* box, ShellSpecStruct* shell)
588 {
589   integer i;
590   for(i = 0; i < NO_OF_DIMENSIONS; i++)
591     {
592       box->min[i] = shell->centerCoords[i] - shell->extent;
593       box->max[i] = shell->centerCoords[i] + shell->extent;
594     } /* END FOR i */
595   return 0;
596 } /* END get_shell_box */
597 
598 
599 
600 
601 static real
compute_value_at_point(DensitySpecStruct * density,integer noOfNonzeroShells,integer * nonZeroShellsIndexList,integer noOfNonzeroBasFuncs,integer * nonZeroBasFuncsIndexList,real (* coor)[3],real * workList)602 compute_value_at_point(
603 		       DensitySpecStruct* density,
604 		       integer noOfNonzeroShells,
605 		       integer* nonZeroShellsIndexList,
606 		       integer noOfNonzeroBasFuncs,
607 		       integer* nonZeroBasFuncsIndexList,
608 		       real (*coor)[3],
609 		       real* workList)
610 {
611   ShellSpecStruct* currShell;
612   integer i, j, iIndex, jIndex, symmetryFactor, count;
613   real expFactor, result, currivalue;
614   real xdiff, ydiff, zdiff;
615   real x0, y0, z0;
616   real x2, y2, z2, r2;
617   integer nbast;
618   const real* dmat;
619 
620   nbast = density->nbast;
621   dmat = density->dmat;
622 
623   if(noOfNonzeroBasFuncs > nbast)
624     {
625       do_error_exit("error in compute_integral_from_points: "
626 		    "(noOfNonzeroBasFuncs > nbast)\n");
627       return 0;
628     }
629 
630   /* compute values of contracted distributions at given point */
631   count = 0;
632   for(i = 0; i < noOfNonzeroShells; i++)
633     {
634       currShell = &density->shellList[nonZeroShellsIndexList[i]];
635       x0 = currShell->centerCoords[0];
636       y0 = currShell->centerCoords[1];
637       z0 = currShell->centerCoords[2];
638 
639       xdiff = coor[0][0] - x0;
640       ydiff = coor[0][1] - y0;
641       zdiff = coor[0][2] - z0;
642       x2 = xdiff * xdiff;
643       y2 = ydiff * ydiff;
644       z2 = zdiff * zdiff;
645       r2 = x2 + y2 + z2;
646 
647       /* compute expFactor (this is the same procedure for all shell types) */
648       expFactor = 0;
649       for(j = 0; j < currShell->noOfContr; j++)
650 	  expFactor += currShell->coeffList[j] *
651 	    exp(-currShell->exponentList[j] * r2);
652       /* OK, expFactor computed */
653 
654       /* now there will be a different number of entries  */
655       /* depending on shell type */
656       switch(currShell->shellType)
657 	{
658 	case 0:
659             /* 's' type shell, 1 function */
660 	  workList[count] = expFactor *
661 	    solid_harmonic_s_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
662 	  break;
663 	case 1:
664             /* 'p' type shell, 3 functions */
665 	  workList[count] = expFactor *
666 	    solid_harmonic_p_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
667 	  workList[count] = expFactor *
668 	    solid_harmonic_p_1(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
669 	  workList[count] = expFactor *
670 	    solid_harmonic_p_2(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
671 	  break;
672 	case 2:
673             /* 'd' type shell, 5 functions */
674 	  workList[count] = expFactor *
675 	    solid_harmonic_d_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
676 	  workList[count] = expFactor *
677 	    solid_harmonic_d_1(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
678 	  workList[count] = expFactor *
679 	    solid_harmonic_d_2(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
680 	  workList[count] = expFactor *
681 	    solid_harmonic_d_3(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
682 	  workList[count] = expFactor *
683 	    solid_harmonic_d_4(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
684 	  break;
685 	case 3:
686             /* 'f' type shell, 7 functions */
687 	  workList[count] = expFactor *
688 	    solid_harmonic_f_0(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
689 	  workList[count] = expFactor *
690 	    solid_harmonic_f_1(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
691 	  workList[count] = expFactor *
692 	    solid_harmonic_f_2(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
693 	  workList[count] = expFactor *
694 	    solid_harmonic_f_3(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
695 	  workList[count] = expFactor *
696 	    solid_harmonic_f_4(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
697 	  workList[count] = expFactor *
698 	    solid_harmonic_f_5(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
699 	  workList[count] = expFactor *
700 	    solid_harmonic_f_6(xdiff, ydiff, zdiff, x2, y2, z2, r2); count++;
701 	  break;
702 	default:
703 	  do_output_2(0, "error in compute_value_at_point_2: "
704 		      "only spdf type shells implemented");
705 	  do_output_2(0, "currShell->shellType = %i\n", currShell->shellType);
706 	  do_error_exit("error in compute_value_at_point_2");
707 	  return -1;
708 	} /* END SWITCH shellType       */
709     } /* END FOR i (for each shell) */
710 
711   if(count > nbast)
712     {
713       do_error_exit("error in compute_value_at_point: (count > nbast)");
714       return -1;
715     }
716 
717   /* now use density matrix to obtain final result */
718   result = 0;
719   for(i = 0; i < noOfNonzeroBasFuncs; i++)
720     {
721       currivalue = workList[i];
722       iIndex = nonZeroBasFuncsIndexList[i];
723       for(j = i; j < noOfNonzeroBasFuncs; j++)
724 	{
725 	  if(j == i)
726 	    symmetryFactor = 1;
727 	  else
728 	    symmetryFactor = 2;
729 	  jIndex = nonZeroBasFuncsIndexList[j];
730 	  result += symmetryFactor * dmat[iIndex*nbast+jIndex] *
731 	    currivalue * workList[j];
732 	} /* END FOR j */
733     } /* END FOR i */
734 
735   return result;
736 } /* END compute_value_at_point */
737 
738 
739 
740 static real
compute_integral_from_points(DensitySpecStruct * density,integer noOfNonzeroShells,integer * nonZeroShellsIndexList,integer noOfNonzeroBasFuncs,integer * nonZeroBasFuncsIndexList,integer nPoints,real (* coor)[3],real * weight,real * workList)741 compute_integral_from_points(
742 			     DensitySpecStruct* density,
743 			     integer noOfNonzeroShells,
744 			     integer* nonZeroShellsIndexList,
745 			     integer noOfNonzeroBasFuncs,
746 			     integer* nonZeroBasFuncsIndexList,
747 			     integer nPoints,
748 			     real (*coor)[3],
749 			     real* weight,
750 			     real* workList)
751 {
752 #if 1
753   integer i;
754   real sum;
755   sum = 0;
756   for(i = 0; i < nPoints; i++)
757     {
758       sum += compute_value_at_point(density,
759 				    noOfNonzeroShells,
760 				    nonZeroShellsIndexList,
761 				    noOfNonzeroBasFuncs,
762 				    nonZeroBasFuncsIndexList,
763 				    &coor[i],
764 				    workList) * weight[i];
765     } /* END FOR i */
766   return sum;
767 #else
768   ShellSpecStruct* currShell;
769   integer i, j, iIndex, jIndex, symmetryFactor, count, pointNo;
770   real expFactor, result;
771   real xdiff, ydiff, zdiff;
772   real x2, y2, z2, r2;
773   real x0, y0, z0;
774   real sum;
775   integer savedCount;
776   integer nbast;
777   const real* dmat;
778 
779   nbast = density->nbast;
780   dmat = density->dmat;
781 
782   if(noOfNonzeroBasFuncs > nbast)
783     {
784       do_error_exit("error in compute_integral_from_points: "
785 		    "(noOfNonzeroBasFuncs > nbast)\n");
786       return 0;
787     }
788 
789   if(nPoints > MAX_NO_OF_POINTS_PER_BATCH)
790     {
791       do_error_exit("error in compute_integral_from_points: "
792 		    "(nPoints > MAX_NO_OF_POINTS_PER_BATCH)\n");
793       return 0;
794     }
795 
796   memset(workList, 0x00 ,noOfNonzeroBasFuncs * sizeof(real));
797 
798   /* compute values of contracted distributions at given point */
799   count = 0;
800 
801   for(i = 0; i < noOfNonzeroShells; i++)
802     {
803       currShell = &density->shellList[nonZeroShellsIndexList[i]];
804       x0 = currShell->centerCoords[0];
805       y0 = currShell->centerCoords[1];
806       z0 = currShell->centerCoords[2];
807 
808       savedCount = count;;
809       for(pointNo = 0; pointNo < nPoints; pointNo++)
810 	{
811 	  count = savedCount;
812 
813 	  xdiff = coor[pointNo][0] - x0;
814 	  ydiff = coor[pointNo][1] - y0;
815 	  zdiff = coor[pointNo][2] - z0;
816 	  x2 = xdiff * xdiff;
817 	  y2 = ydiff * ydiff;
818 	  z2 = zdiff * zdiff;
819 	  r2 = x2 + y2 + z2;
820 
821 	  /* compute expFactor (this is the same procedure for all
822            * shell types) */
823 	  expFactor = 0;
824 	  for(j = 0; j < currShell->noOfContr; j++)
825 	      expFactor += currShell->coeffList[j] *
826 		exp(-currShell->exponentList[j] * r2);
827 	  /* OK, expFactor computed */
828 	  /*printf("expFactor = %.22f\n", expFactor); */
829 
830 	  /* now there will be a different number of entries  */
831 	  /* depending on shell type */
832 	  switch(currShell->shellType)
833 	    {
834 	    case 0:
835                 /* 's' type shell, 1 function */
836 	      workList[count*nPoints+pointNo] = expFactor *
837 		solid_harmonic_s_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
838 	      count++;
839 	      break;
840 	    case 1:
841                 /* 'p' type shell, 3 functions */
842 	      workList[count*nPoints+pointNo] = expFactor *
843 		solid_harmonic_p_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
844 	      count++;
845 	      workList[count*nPoints+pointNo] = expFactor *
846 		solid_harmonic_p_1(xdiff, ydiff, zdiff, x2, y2, z2, r2);
847 	      count++;
848 	      workList[count*nPoints+pointNo] = expFactor *
849 		solid_harmonic_p_2(xdiff, ydiff, zdiff, x2, y2, z2, r2);
850 	      count++;
851 	      break;
852 	    case 2:
853                 /* 'd' type shell, 5 functions */
854 	      workList[count*nPoints+pointNo] = expFactor *
855 		solid_harmonic_d_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
856 	      count++;
857 	      workList[count*nPoints+pointNo] = expFactor *
858 		solid_harmonic_d_1(xdiff, ydiff, zdiff, x2, y2, z2, r2);
859 	      count++;
860 	      workList[count*nPoints+pointNo] = expFactor *
861 		solid_harmonic_d_2(xdiff, ydiff, zdiff, x2, y2, z2, r2);
862 	      count++;
863 	      workList[count*nPoints+pointNo] = expFactor *
864 		solid_harmonic_d_3(xdiff, ydiff, zdiff, x2, y2, z2, r2);
865 	      count++;
866 	      workList[count*nPoints+pointNo] = expFactor *
867 		solid_harmonic_d_4(xdiff, ydiff, zdiff, x2, y2, z2, r2);
868 	      count++;
869 	      break;
870 	    case 3:
871                 /* 'f' type shell, 7 functions */
872 	      workList[count*nPoints+pointNo] = expFactor *
873 		solid_harmonic_f_0(xdiff, ydiff, zdiff, x2, y2, z2, r2);
874 	      count++;
875 	      workList[count*nPoints+pointNo] = expFactor *
876 		solid_harmonic_f_1(xdiff, ydiff, zdiff, x2, y2, z2, r2);
877 	      count++;
878 	      workList[count*nPoints+pointNo] = expFactor *
879 		solid_harmonic_f_2(xdiff, ydiff, zdiff, x2, y2, z2, r2);
880 	      count++;
881 	      workList[count*nPoints+pointNo] = expFactor *
882 		solid_harmonic_f_3(xdiff, ydiff, zdiff, x2, y2, z2, r2);
883 	      count++;
884 	      workList[count*nPoints+pointNo] = expFactor *
885 		solid_harmonic_f_4(xdiff, ydiff, zdiff, x2, y2, z2, r2);
886 	      count++;
887 	      workList[count*nPoints+pointNo] = expFactor *
888 		solid_harmonic_f_5(xdiff, ydiff, zdiff, x2, y2, z2, r2);
889 	      count++;
890 	      workList[count*nPoints+pointNo] = expFactor *
891 		solid_harmonic_f_6(xdiff, ydiff, zdiff, x2, y2, z2, r2);
892 	      count++;
893 	      break;
894 	    default:
895 	      do_output_2(0, "error in compute_integral_from_points: "
896 			  "only spdf type shells implemented");
897 	      do_output_2(0, "currShell->shellType = %i\n",
898 			  currShell->shellType);
899 	      do_error_exit("error in compute_integral_from_points");
900 	      return -1;
901 	    } /* END SWITCH shellType */
902 
903 	} /* END FOR pointNo */
904 
905     } /* END FOR i (for each shell) */
906 
907   if(count != noOfNonzeroBasFuncs)
908     {
909       do_error_exit("error in compute_integral_from_points: "
910 		    "(count != noOfNonzeroBasFuncs)");
911       return -1;
912     }
913 
914   /* now use density matrix to obtain final result */
915   result = 0;
916   for(i = 0; i < noOfNonzeroBasFuncs; i++)
917     {
918       iIndex = nonZeroBasFuncsIndexList[i];
919       for(j = i; j < noOfNonzeroBasFuncs; j++)
920 	{
921 	  if(j == i)
922 	    symmetryFactor = 1;
923 	  else
924 	    symmetryFactor = 2;
925 	  jIndex = nonZeroBasFuncsIndexList[j];
926 	  /*real *restrict ci = &workList[i*nPoints]; */
927 	  /*real *restrict cj = &workList[j*nPoints]; */
928 	  /*real *restrict wt = weight; */
929 	  sum = 0;
930 	  for(pointNo = 0; pointNo < nPoints; pointNo++)
931 	    {
932 	      sum += workList[i*nPoints+pointNo] *
933 		workList[j*nPoints+pointNo] * weight[pointNo];
934 	      /*sum += ci[pointNo] * cj[pointNo] * wt[pointNo]; */
935 	    } /* END FOR pointNo */
936 
937 	  result += symmetryFactor * dmat[iIndex*nbast+jIndex] * sum;
938 	} /* END FOR j */
939     } /* END FOR i */
940 
941   return result;
942 #endif
943 } /* END compute_integral_from_points */
944 
945 
946 
947 static real
to_power(real x,integer n)948 to_power(real x, integer n)
949 {
950   real result;
951   integer i;
952   result = 1;
953   for(i = 0; i < n; i++)
954     result *= x;
955   return result;
956 }
957 
958 
959 static real
compute_1d_gaussian_integral_recursive(real a,real b,integer n,real alpha)960 compute_1d_gaussian_integral_recursive(real a, real b, integer n, real alpha)
961 {
962   real result, sqrtalpha, term1, term2;
963   real aToPowerNminus1, bToPowerNminus1;
964   if(n == 0)
965     {
966       sqrtalpha = sqrt(alpha);
967       result = sqrt(CONSTPI/(4*alpha)) * (erf(sqrtalpha*b) - erf(sqrtalpha*a));
968       return result;
969     }
970   if(n == 1)
971     {
972       result = -(1 / (2*alpha)) * (exp(-alpha*b*b) - exp(-alpha*a*a));
973       return result;
974     }
975   if(n < 0)
976     {
977       do_output_2(0, "error in 1dintegral: n < 0");
978       return 0;
979     }
980   /* now we know that n >= 2 */
981   term1 = (n - 1) * compute_1d_gaussian_integral_recursive(a, b, n-2, alpha);
982   aToPowerNminus1 = to_power(a, n-1);
983   bToPowerNminus1 = to_power(b, n-1);
984   term2  =
985     bToPowerNminus1 * exp(-alpha*b*b) -
986     aToPowerNminus1 * exp(-alpha*a*a);
987   result = (term1 - term2) / (2 * alpha);
988   /*  return 0; */
989   return result;
990 } /* END compute_1d_gaussian_integral_recursive */
991 
992 
993 
994 static real
compute_1d_gaussian_integral(real a,real b,integer n,real alpha)995 compute_1d_gaussian_integral(real a, real b, integer n, real alpha)
996 {
997   real result, sqrtalpha, term1, term2;
998   return compute_1d_gaussian_integral_recursive(a, b, n, alpha);
999   result = 0;
1000   switch(n)
1001     {
1002     case 0:
1003       sqrtalpha = sqrt(alpha);
1004       result = sqrt(CONSTPI/(4*alpha)) * (erf(sqrtalpha*b) - erf(sqrtalpha*a));
1005       break;
1006     case 1:
1007       result = -(1 / (2*alpha)) * (exp(-alpha*b*b) - exp(-alpha*a*a));
1008       break;
1009     case 2:
1010       sqrtalpha = sqrt(alpha);
1011       term1 =
1012 	sqrt(CONSTPI/(16*alpha*alpha*alpha)) *
1013 	(erf(sqrtalpha*b) - erf(sqrtalpha*a));
1014       term2 = -(1 / (2 * alpha)) * (b*exp(-alpha*b*b) - a*exp(-alpha*a*a));
1015       result = term1 + term2;
1016       break;
1017     case 3:
1018       result = -(1 / (2*alpha*alpha)) * ((1+alpha*b*b)*exp(-alpha*b*b) -
1019 					 (1+alpha*a*a)*exp(-alpha*a*a));
1020       break;
1021     default:
1022       compute_1d_gaussian_integral_recursive(a, b, n, alpha);
1023       break;
1024     } /* END SWITCH n */
1025   return result;
1026 } /* END compute_1d_gaussian_integral */
1027 
1028 
1029 static real
compute_integral_over_box(DistributionSpecStruct * distr,BoxStruct * box)1030 compute_integral_over_box(DistributionSpecStruct* distr, BoxStruct* box)
1031 {
1032   real result, a, b, alpha;
1033   integer i, n;
1034   result = distr->coeff;
1035   alpha = distr->exponent;
1036   for(i = 0; i < NO_OF_DIMENSIONS; i++)
1037     {
1038       n = distr->monomialInts[i];
1039       a = box->min[i] - distr->centerCoords[i];
1040       b = box->max[i] - distr->centerCoords[i];
1041       result *= compute_1d_gaussian_integral(a, b, n, alpha);
1042     } /* END FOR i */
1043   return result;
1044 } /* END compute_integral_over_box */
1045 
1046 
1047 static integer
get_distrs_for_box(integer * resultList,rhoTreeNode * node,BoxStruct * inputBoxPtr)1048 get_distrs_for_box(integer* resultList, rhoTreeNode* node, BoxStruct* inputBoxPtr)
1049 {
1050 #define MAX_DEPTH 888
1051   integer n, i, overlap, currDepth;
1052   rhoTreeNode* nodeList[MAX_DEPTH];
1053   integer statusList[MAX_DEPTH];
1054   rhoTreeNode* currNode;
1055   BoxStruct box;
1056   BoxStruct* currBox;
1057 
1058   memcpy(&box, inputBoxPtr, sizeof(BoxStruct));
1059 
1060   n = 0;
1061   currDepth = 0;
1062   nodeList[0] = node;
1063   statusList[0] = 0;
1064   while(currDepth >= 0)
1065     {
1066       if(statusList[currDepth] == 2)
1067 	currDepth--;
1068       else
1069 	{
1070 
1071 	  currNode = nodeList[currDepth];
1072 	  currBox = &currNode->box;
1073 
1074 	  /* check for box overlap */
1075 	  overlap = 1;
1076 	  for(i = 0; i < NO_OF_DIMENSIONS; i++)
1077 	    {
1078 	      if(currBox->min[i] > box.max[i])
1079 		overlap = 0;
1080 	      if(currBox->max[i] < box.min[i])
1081 		overlap = 0;
1082 	    } /* END FOR i */
1083 	  if(overlap == 0)
1084 	    currDepth--;
1085 	  else
1086 	    {
1087 
1088 	      if(statusList[currDepth] == 0)
1089 		{
1090 		  if(currNode->distrIndex >= 0)
1091 		    {
1092 		      resultList[n] = currNode->distrIndex;
1093 		      n++;
1094 		      currDepth--;
1095 		    }
1096 		  else
1097 		    {
1098 		      statusList[currDepth] = 1;
1099 		      currDepth++;
1100 		      statusList[currDepth] = 0;
1101 		      nodeList[currDepth] = currNode->child1;
1102 		    }
1103 		} /* END IF status 0 */
1104 	      else
1105 		{
1106                     /* status is 1 */
1107 		  statusList[currDepth] = 2;
1108 		  currDepth++;
1109 		  statusList[currDepth] = 0;
1110 		  nodeList[currDepth] = currNode->child2;
1111 		} /* END ELSE status 1 */
1112 	    }
1113 	}
1114     } /* END WHILE (currDepth >= 0) */
1115 
1116   return n;
1117 } /* END get_distrs_for_box */
1118 
1119 static integer
use_cubature_rule(integer maxlen,real (* coor)[3],real * weight,BoxStruct * box,integer ruleNumber)1120 use_cubature_rule(integer maxlen,
1121 		  real (*coor)[3],
1122 		  real *weight,
1123 		  BoxStruct* box,
1124 		  integer ruleNumber)
1125 {
1126   real volume, diff0, diff1, diff2;
1127   real c0, c1, c2, a, b;
1128   real currCoords[3];
1129   integer Ngrid, currIndex;
1130   integer i, j, k, ii;
1131   real a0, a1, a2;
1132 
1133   volume = 1;
1134   for(i = 0; i < NO_OF_DIMENSIONS; i++)
1135     volume *= (box->max[i] - box->min[i]);
1136 
1137   switch(ruleNumber)
1138     {
1139     case 1: /* single point in center of box */
1140       Ngrid = 1;
1141       if(Ngrid >= maxlen)
1142 	{
1143 	  do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1144 	  return -1;
1145 	}
1146       for(i = 0; i < NO_OF_DIMENSIONS; i++)
1147 	  coor[0][i] = (box->max[i] + box->min[i]) / 2;
1148       weight[0] = volume;
1149       break;
1150     case 2: /* eight points towards corners of box */
1151       Ngrid = 8;
1152       if(Ngrid >= maxlen)
1153 	{
1154 	  do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1155 	  return -1;
1156 	}
1157       for(i = 0; i < Ngrid; i++)
1158 	weight[i] = volume / 8;
1159       diff0 = box->max[0] - box->min[0];
1160       diff1 = box->max[1] - box->min[1];
1161       diff2 = box->max[2] - box->min[2];
1162       currIndex = 0;
1163       for(i = 0; i < 2; i++)
1164 	{
1165 	  currCoords[0] = box->min[0] + 0.25*diff0 + 0.5*diff0*i;
1166 	  for(j = 0; j < 2; j++)
1167 	    {
1168 	      currCoords[1] = box->min[1] + 0.25*diff1 + 0.5*diff1*i;
1169 	      for(k = 0; k < 2; k++)
1170 		{
1171 		  currCoords[2] = box->min[2] + 0.25*diff2 + 0.5*diff2*i;
1172 		  for(ii = 0; ii < 3; ii++)
1173 		    {
1174 		      coor[currIndex][ii] = currCoords[ii];
1175 		    } /* END FOR ii */
1176 		  currIndex++;
1177 		} /* END FOR k */
1178 	    } /* END FOR j */
1179 	} /* END FOR i */
1180       break;
1181     case 3: /* 14 point, degree 5 rule (Stroud 1971) */
1182       Ngrid = 14;
1183       if(Ngrid >= maxlen)
1184 	{
1185 	  do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1186 	  return -1;
1187 	}
1188       for(i = 0; i < 6; i++)
1189 	weight[i] = volume *  0.88642659279778393 / 8;
1190       for(i = 6; i < 14; i++)
1191 	weight[i] = volume * 0.33518005540166204 / 8;
1192       diff0 = box->max[0] - box->min[0];
1193       diff1 = box->max[1] - box->min[1];
1194       diff2 = box->max[2] - box->min[2];
1195       c0 = (box->max[0] + box->min[0]) / 2;
1196       c1 = (box->max[1] + box->min[1]) / 2;
1197       c2 = (box->max[2] + box->min[2]) / 2;
1198       a = 0.79582242575422146 * 0.5;
1199       b = 0.75878691063932814 * 0.5;
1200 
1201 #define MACRO_3VECT(v,x,y,z) v[0]=x; v[1]=y; v[2]=z;
1202 
1203       MACRO_3VECT(coor[0], c0-a*diff0, c1,         c2        );
1204       MACRO_3VECT(coor[1], c0+a*diff0, c1,         c2        );
1205       MACRO_3VECT(coor[2], c0        , c1-a*diff1, c2        );
1206       MACRO_3VECT(coor[3], c0        , c1+a*diff1, c2        );
1207       MACRO_3VECT(coor[4], c0        , c1        , c2-a*diff2);
1208       MACRO_3VECT(coor[5], c0        , c1        , c2+a*diff2);
1209 
1210       MACRO_3VECT(coor[ 6], c0-b*diff0, c1-b*diff1, c2-b*diff2);
1211       MACRO_3VECT(coor[ 7], c0-b*diff0, c1-b*diff1, c2+b*diff2);
1212       MACRO_3VECT(coor[ 8], c0-b*diff0, c1+b*diff1, c2-b*diff2);
1213       MACRO_3VECT(coor[ 9], c0-b*diff0, c1+b*diff1, c2+b*diff2);
1214       MACRO_3VECT(coor[10], c0+b*diff0, c1-b*diff1, c2-b*diff2);
1215       MACRO_3VECT(coor[11], c0+b*diff0, c1-b*diff1, c2+b*diff2);
1216       MACRO_3VECT(coor[12], c0+b*diff0, c1+b*diff1, c2-b*diff2);
1217       MACRO_3VECT(coor[13], c0+b*diff0, c1+b*diff1, c2+b*diff2);
1218 
1219       break;
1220 
1221     case 4: /* 25 point, degree 5 rule (Stroud 1971) */
1222       Ngrid = 25;
1223       if(Ngrid >= maxlen)
1224 	{
1225 	  do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1226 	  return -1;
1227 	}
1228       weight[0] = volume * 1.6842105263157894 / 8;
1229       for(i = 1; i < 25; i++)
1230 	weight[i] = volume *  0.26315789473684210 / 8;
1231 
1232       diff0 = box->max[0] - box->min[0];
1233       diff1 = box->max[1] - box->min[1];
1234       diff2 = box->max[2] - box->min[2];
1235       c0 = (box->max[0] + box->min[0]) / 2;
1236       c1 = (box->max[1] + box->min[1]) / 2;
1237       c2 = (box->max[2] + box->min[2]) / 2;
1238       a = 0.47800981191507060 * 0.5;
1239       b = 0.89982215247931316 * 0.5;
1240 
1241       MACRO_3VECT(coor[0], c0, c1, c2);
1242 
1243       ii = 1;
1244       a0 = a;
1245       a1 = a;
1246       a2 = b;
1247       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1248       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1249       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1250       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1251       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1252       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1253       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1254       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1255       a0 = a;
1256       a1 = b;
1257       a2 = a;
1258       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1259       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1260       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1261       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1262       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1263       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1264       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1265       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1266       a0 = b;
1267       a1 = a;
1268       a2 = a;
1269       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1270       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1271       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1272       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1273       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1274       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1275       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1276       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1277 
1278       break;
1279 
1280     case 5: /* 27 point, degree 7 rule (Stroud 1971) */
1281       Ngrid = 27;
1282       if(Ngrid >= maxlen)
1283 	{
1284 	  do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1285 	  return -1;
1286 	}
1287       weight[0] = volume * 0.78807348274421057 / 8;
1288       for(i = 1; i < 7; i++)
1289 	weight[i] = volume *  0.49936900230772032 / 8;
1290       for(i = 7; i < 19; i++)
1291 	weight[i] = volume *  0.032303742334037395 / 8;
1292       for(i = 19; i < 27; i++)
1293 	weight[i] = volume *  0.47850844942512734 / 8;
1294 
1295       diff0 = box->max[0] - box->min[0];
1296       diff1 = box->max[1] - box->min[1];
1297       diff2 = box->max[2] - box->min[2];
1298       c0 = (box->max[0] + box->min[0]) / 2;
1299       c1 = (box->max[1] + box->min[1]) / 2;
1300       c2 = (box->max[2] + box->min[2]) / 2;
1301 
1302       MACRO_3VECT(coor[0], c0, c1, c2);
1303       a = 0.84841801147225245 * 0.5;
1304       ii = 1;
1305       MACRO_3VECT(coor[ii], c0-a*diff0, c1        , c2        ); ii++;
1306       MACRO_3VECT(coor[ii], c0+a*diff0, c1        , c2        ); ii++;
1307       MACRO_3VECT(coor[ii], c0        , c1-a*diff1, c2        ); ii++;
1308       MACRO_3VECT(coor[ii], c0        , c1+a*diff1, c2        ); ii++;
1309       MACRO_3VECT(coor[ii], c0        , c1        , c2-a*diff2); ii++;
1310       MACRO_3VECT(coor[ii], c0        , c1        , c2+a*diff2); ii++;
1311       a = 1.1064128986267175 * 0.5;
1312       MACRO_3VECT(coor[ii], c0-a*diff0, c1-a*diff1, c2        ); ii++;
1313       MACRO_3VECT(coor[ii], c0-a*diff0, c1+a*diff1, c2        ); ii++;
1314       MACRO_3VECT(coor[ii], c0+a*diff0, c1-a*diff1, c2        ); ii++;
1315       MACRO_3VECT(coor[ii], c0+a*diff0, c1+a*diff1, c2        ); ii++;
1316       MACRO_3VECT(coor[ii], c0-a*diff0, c1        , c2-a*diff2); ii++;
1317       MACRO_3VECT(coor[ii], c0-a*diff0, c1        , c2+a*diff2); ii++;
1318       MACRO_3VECT(coor[ii], c0+a*diff0, c1        , c2-a*diff2); ii++;
1319       MACRO_3VECT(coor[ii], c0+a*diff0, c1        , c2+a*diff2); ii++;
1320       MACRO_3VECT(coor[ii], c0        , c1-a*diff1, c2-a*diff2); ii++;
1321       MACRO_3VECT(coor[ii], c0        , c1-a*diff1, c2+a*diff2); ii++;
1322       MACRO_3VECT(coor[ii], c0        , c1+a*diff1, c2-a*diff2); ii++;
1323       MACRO_3VECT(coor[ii], c0        , c1+a*diff1, c2+a*diff2); ii++;
1324       a0 = a1 = a2 = 0.65281647210169120 * 0.5;
1325       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1326       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1327       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1328       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1329       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1330       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1331       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1332       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1333 
1334       break;
1335 
1336     case 6: /* 32 point, degree 7 rule (Beckers 1992) */
1337       Ngrid = 32;
1338       if(Ngrid >= maxlen)
1339 	{
1340 	  do_output_2(0, "error in use_cubature_rule: (Ngrid >= maxlen)");
1341 	  return -1;
1342 	}
1343       for(i = 0; i < 6; i++)
1344 	weight[i] = volume * 0.14098806933910446 / 8;
1345       for(i = 6; i < 12; i++)
1346 	weight[i] = volume * 0.53332245896607639 / 8;
1347       for(i = 12; i < 24; i++)
1348 	weight[i] = volume *  0.049451452995044458 / 8;
1349       for(i = 24; i < 32; i++)
1350 	weight[i] = volume *  0.42008992427854766 / 8;
1351 
1352       diff0 = box->max[0] - box->min[0];
1353       diff1 = box->max[1] - box->min[1];
1354       diff2 = box->max[2] - box->min[2];
1355       c0 = (box->max[0] + box->min[0]) / 2;
1356       c1 = (box->max[1] + box->min[1]) / 2;
1357       c2 = (box->max[2] + box->min[2]) / 2;
1358 
1359       ii = 0;
1360 
1361       a = 1.0 * 0.5;
1362       MACRO_3VECT(coor[ii], c0-a*diff0, c1        , c2        ); ii++;
1363       MACRO_3VECT(coor[ii], c0+a*diff0, c1        , c2        ); ii++;
1364       MACRO_3VECT(coor[ii], c0        , c1-a*diff1, c2        ); ii++;
1365       MACRO_3VECT(coor[ii], c0        , c1+a*diff1, c2        ); ii++;
1366       MACRO_3VECT(coor[ii], c0        , c1        , c2-a*diff2); ii++;
1367       MACRO_3VECT(coor[ii], c0        , c1        , c2+a*diff2); ii++;
1368       a = 0.66289786904352112 * 0.5;
1369       MACRO_3VECT(coor[ii], c0-a*diff0, c1        , c2        ); ii++;
1370       MACRO_3VECT(coor[ii], c0+a*diff0, c1        , c2        ); ii++;
1371       MACRO_3VECT(coor[ii], c0        , c1-a*diff1, c2        ); ii++;
1372       MACRO_3VECT(coor[ii], c0        , c1+a*diff1, c2        ); ii++;
1373       MACRO_3VECT(coor[ii], c0        , c1        , c2-a*diff2); ii++;
1374       MACRO_3VECT(coor[ii], c0        , c1        , c2+a*diff2); ii++;
1375       a = 1.0306143700994171 * 0.5;
1376       MACRO_3VECT(coor[ii], c0-a*diff0, c1-a*diff1, c2        ); ii++;
1377       MACRO_3VECT(coor[ii], c0-a*diff0, c1+a*diff1, c2        ); ii++;
1378       MACRO_3VECT(coor[ii], c0+a*diff0, c1-a*diff1, c2        ); ii++;
1379       MACRO_3VECT(coor[ii], c0+a*diff0, c1+a*diff1, c2        ); ii++;
1380       MACRO_3VECT(coor[ii], c0-a*diff0, c1        , c2-a*diff2); ii++;
1381       MACRO_3VECT(coor[ii], c0-a*diff0, c1        , c2+a*diff2); ii++;
1382       MACRO_3VECT(coor[ii], c0+a*diff0, c1        , c2-a*diff2); ii++;
1383       MACRO_3VECT(coor[ii], c0+a*diff0, c1        , c2+a*diff2); ii++;
1384       MACRO_3VECT(coor[ii], c0        , c1-a*diff1, c2-a*diff2); ii++;
1385       MACRO_3VECT(coor[ii], c0        , c1-a*diff1, c2+a*diff2); ii++;
1386       MACRO_3VECT(coor[ii], c0        , c1+a*diff1, c2-a*diff2); ii++;
1387       MACRO_3VECT(coor[ii], c0        , c1+a*diff1, c2+a*diff2); ii++;
1388       a0 = a1 = a2 = 0.66713797405746656 * 0.5;
1389       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1390       MACRO_3VECT(coor[ii], c0-a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1391       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1392       MACRO_3VECT(coor[ii], c0-a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1393       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2-a2*diff2); ii++;
1394       MACRO_3VECT(coor[ii], c0+a0*diff0, c1-a1*diff1, c2+a2*diff2); ii++;
1395       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2-a2*diff2); ii++;
1396       MACRO_3VECT(coor[ii], c0+a0*diff0, c1+a1*diff1, c2+a2*diff2); ii++;
1397 
1398       break;
1399 
1400     default:
1401       do_output_2(0, "error: unknown cubature rule");
1402       return -1;
1403     } /* END SWITCH */
1404 
1405   /*
1406   real testSum = 0;
1407   for(i = 0; i < Ngrid; i++)
1408     testSum += weight[i];
1409   printf("testSum = %22.11f\n", testSum);
1410   printf("volume  = %22.11f\n\n", volume);
1411   */
1412 
1413   return Ngrid;
1414 } /* END use_cubature_rule */
1415 
1416 
1417 static integer
compute_grid_for_box(compute_grid_for_box_params_struct * params,integer maxlen,real (* coor)[3],real * weight,BoxStruct * box,real analyticalIntegralValue,real * workList,real * totalIntegralResult)1418 compute_grid_for_box(compute_grid_for_box_params_struct* params,
1419 		     integer maxlen,
1420 		     real (*coor)[3],
1421 		     real *weight,
1422 		     BoxStruct* box,
1423 		     real analyticalIntegralValue,
1424 		     real* workList,
1425 		     real* totalIntegralResult)
1426 {
1427 #define MAX_NO_OF_TEST_POINTS 88
1428 
1429   integer noOfGridPoints;
1430   integer Ngrid;
1431   integer i;
1432   real Iapprox, Iexact;
1433   BoxStruct box1;
1434   BoxStruct box2;
1435   integer bestcoord, nPoints1, nPoints2;
1436   real abserror, dist, maxdist, halfway;
1437   real IexactAbs;
1438   real analyticalIntegralBox1, analyticalIntegralBox2;
1439   integer splitBox;
1440 
1441   /*return 0; */
1442 
1443   /* Define Ngrid points inside box, with corresponding weights */
1444   /* this is where the 'cubature rule' is used */
1445   Ngrid = use_cubature_rule(maxlen, coor, weight, box, CUBATURE_RULE);
1446   if(Ngrid <= 0)
1447     {
1448       do_output_2(0, "error in use_cubature_rule");
1449       return -1;
1450     }
1451   noOfGridPoints = Ngrid;
1452 
1453   /* compute approximate and exact integrals for box */
1454 
1455   Iapprox = 0;
1456 
1457   Iapprox = compute_integral_from_points(
1458 					 &params->density,
1459 					 params->noOfNonzeroShells,
1460 					 params->nonZeroShellsIndexList,
1461 					 params->noOfNonzeroDistributions,
1462 					 params->nonZeroDistrIndexList,
1463 					 Ngrid,
1464 					 &coor[0],
1465 					 weight,
1466 					 workList);
1467 
1468   Iexact = analyticalIntegralValue;
1469 
1470   IexactAbs = Iexact;
1471   if(IexactAbs < 0) IexactAbs *= -1;
1472 
1473   /* compute absolute error */
1474   abserror = Iexact - Iapprox;
1475   if(abserror < 0) abserror *= -1;
1476 
1477   /* check if error is too large */
1478   splitBox = 1;
1479 
1480   if((abserror*100) < params->maxerrorPerBox)
1481     splitBox = 0;
1482 
1483   if((splitBox == 1) && (abserror < params->maxerrorPerBox))
1484     {
1485         /* it seems that the error is small enough. */
1486         /* however, this could be a coincidence. */
1487         /* to check, compare with denser grid */
1488       real testCoor[MAX_NO_OF_TEST_POINTS][3];
1489       real testWeight[MAX_NO_OF_TEST_POINTS];
1490       real testIapprox;
1491       integer Ngrid2, testAbsError;
1492 
1493       Ngrid2 = use_cubature_rule(MAX_NO_OF_TEST_POINTS,
1494 				 testCoor, testWeight, box, CUBATURE_RULE_2);
1495       if(Ngrid2 <= 0)
1496 	{
1497 	  do_output_2(0, "error in use_cubature_rule");
1498 	  return -1;
1499 	}
1500 
1501       testIapprox =
1502 	compute_integral_from_points(
1503 				     &params->density,
1504 				     params->noOfNonzeroShells,
1505 				     params->nonZeroShellsIndexList,
1506 				     params->noOfNonzeroDistributions,
1507 				     params->nonZeroDistrIndexList,
1508 				     Ngrid2,
1509 				     &testCoor[0],
1510 				     testWeight,
1511 				     workList);
1512       testAbsError = fabs(Iexact - testIapprox);
1513       /* we demand that the denser grid should give better result */
1514       /*printf("abserror     = %66.55f\n", abserror); */
1515       /*printf("testAbsError = %66.55f\n\n", testAbsError); */
1516       if(testAbsError <= abserror)
1517 	splitBox = 0;
1518 
1519       /*splitBox = 0; */
1520     }
1521   if(splitBox == 1)
1522     {
1523         /* error too large, split box into box1 and box2 */
1524 
1525         /* first determine in which coordinate direction to do the split */
1526       maxdist = 0;
1527       bestcoord = -1;
1528       for(i = 0; i < NO_OF_DIMENSIONS; i++)
1529 	{
1530 	  dist = box->max[i] - box->min[i];
1531 	  if(dist > maxdist)
1532 	    {
1533 	      maxdist = dist;
1534 	      bestcoord = i;
1535 	    }
1536 	} /* END FOR i */
1537       if(bestcoord < 0)
1538 	return -1;
1539       /* now create new boxes box1 and box2 */
1540       for(i = 0; i < NO_OF_DIMENSIONS; i++)
1541 	{
1542 	  if(i == bestcoord)
1543 	    {
1544                 /* direction of split */
1545 	      halfway = (box->max[i] + box->min[i]) / 2;
1546 	      box1.min[i] = box->min[i];
1547 	      box1.max[i] = halfway;
1548 	      box2.min[i] = halfway;
1549 	      box2.max[i] = box->max[i];
1550 	    }
1551 	  else
1552 	    {
1553                 /* other direction, simply copy bounds */
1554 	      box1.min[i] = box->min[i];
1555 	      box1.max[i] = box->max[i];
1556 	      box2.min[i] = box->min[i];
1557 	      box2.max[i] = box->max[i];
1558 	    }
1559 	} /* END FOR i */
1560       /* now boxes box1 and box2 are now created */
1561 
1562       analyticalIntegralBox1 = 0;
1563       for(i = 0; i < params->density.noOfDistributions; i++)
1564 	analyticalIntegralBox1 +=
1565 	  compute_integral_over_box(&params->density.distrList[i], &box1);
1566 
1567       analyticalIntegralBox2 =
1568 	analyticalIntegralValue - analyticalIntegralBox1;
1569 
1570       /* create grid points for box1 */
1571       nPoints1 = compute_grid_for_box(params,
1572 				      maxlen,
1573 				      coor,
1574 				      weight,
1575 				      &box1,
1576 				      analyticalIntegralBox1,
1577 				      workList,
1578 				      totalIntegralResult);
1579       if(nPoints1 < 0)
1580 	return -1;
1581       /* create grid points for box2 */
1582       nPoints2 = compute_grid_for_box(params,
1583 				      maxlen-nPoints1,
1584 				      &coor[nPoints1],
1585 				      &weight[nPoints1],
1586 				      &box2,
1587 				      analyticalIntegralBox2,
1588 				      workList,
1589 				      totalIntegralResult);
1590       if(nPoints2 < 0)
1591 	return -1;
1592       noOfGridPoints = nPoints1 + nPoints2;
1593     } /* END IF error too large */
1594   else
1595     {
1596         /* error acceptable,  */
1597         /* the computed grid points for this box are good enough. */
1598         /* do nothing more, just return the number of points */
1599       *totalIntegralResult += Iapprox;
1600     }
1601 
1602   return noOfGridPoints;
1603 } /* END compute_grid_for_box */
1604 
1605 
1606 
1607 static rhoTreeNode*
BuildRhoTreeBranch(integer noOfDistributionsTot,DistributionSpecStruct * rho_alt_1,ShellSpecStruct * rho_alt_2,integer distrIndexListN,integer * distrIndexList,real targetRhoError)1608 BuildRhoTreeBranch(integer noOfDistributionsTot,
1609 		   DistributionSpecStruct* rho_alt_1,
1610 		   ShellSpecStruct* rho_alt_2,
1611 		   integer distrIndexListN,
1612 		   integer* distrIndexList,
1613 		   real targetRhoError)
1614 {
1615   rhoTreeNode* newNode;
1616   integer i, j, samePoint, n1, n2, bestCoord;
1617   BoxStruct tempBox;
1618   real currCoord, currDiff, maxDiff, limit, extent1, extent2, testCoord;
1619   rhoTreeNode* child1;
1620   rhoTreeNode* child2;
1621   integer* tempList;
1622   integer tempInt;
1623 
1624   if(distrIndexListN < 1)
1625     {
1626       printf("error in BuildRhoTreeBranch: (distrIndexListN < 1), "
1627 	     "distrIndexListN = %i\n", distrIndexListN);
1628       return NULL;
1629     }
1630 
1631   newNode = dal_malloc_safe(sizeof(rhoTreeNode));
1632 
1633   /* compute bounding box for this node */
1634   if(rho_alt_1 != NULL)
1635     {
1636       if(get_distribution_box(&newNode->box,
1637 			      &rho_alt_1[distrIndexList[0]],
1638 			      targetRhoError) != 0)
1639 	return NULL;
1640     }
1641   else
1642     {
1643       if(get_shell_box(&newNode->box, &rho_alt_2[distrIndexList[0]]) != 0)
1644 	return NULL;
1645     }
1646   for(i = 1; i < distrIndexListN; i++)
1647     {
1648       if(rho_alt_1 != NULL)
1649 	{
1650 	  if(get_distribution_box(&tempBox,
1651 				  &rho_alt_1[distrIndexList[i]],
1652 				  targetRhoError) != 0)
1653 	    return NULL;
1654 	}
1655       else
1656 	{
1657 	  if(get_shell_box(&tempBox, &rho_alt_2[distrIndexList[i]]) != 0)
1658 	    return NULL;
1659 	}
1660       for(j = 0; j < NO_OF_DIMENSIONS; j++)
1661 	{
1662 	  if(tempBox.min[j] < newNode->box.min[j])
1663 	    newNode->box.min[j] = tempBox.min[j];
1664 	  if(tempBox.max[j] > newNode->box.max[j])
1665 	    newNode->box.max[j] = tempBox.max[j];
1666 	} /* END FOR j */
1667     } /* END FOR i */
1668 
1669   /* check if only one distr */
1670   if(distrIndexListN == 1)
1671     {
1672         /* OK, this becomes a leaf node */
1673       newNode->child1 = NULL;
1674       newNode->child2 = NULL;
1675       newNode->distrIndex = distrIndexList[0];
1676       return newNode;
1677     }
1678 
1679   /* There is more than one distribution */
1680   /* Get box that encloses all distributions */
1681   for(i = 0; i < NO_OF_DIMENSIONS; i++)
1682     {
1683       if(rho_alt_1 != NULL)
1684 	{
1685 	  tempBox.min[i] = rho_alt_1[distrIndexList[0]].centerCoords[i];
1686 	  tempBox.max[i] = rho_alt_1[distrIndexList[0]].centerCoords[i];
1687 	}
1688       else
1689 	{
1690 	  tempBox.min[i] = rho_alt_2[distrIndexList[0]].centerCoords[i];
1691 	  tempBox.max[i] = rho_alt_2[distrIndexList[0]].centerCoords[i];
1692 	}
1693     } /* END FOR i */
1694   for(i = 1; i < distrIndexListN; i++)
1695     {
1696       for(j = 0; j < NO_OF_DIMENSIONS; j++)
1697 	{
1698 	  if(rho_alt_1 != NULL)
1699 	    currCoord = rho_alt_1[distrIndexList[i]].centerCoords[j];
1700 	  else
1701 	    currCoord = rho_alt_2[distrIndexList[i]].centerCoords[j];
1702 	  if(tempBox.min[j] > currCoord) tempBox.min[j] = currCoord;
1703 	  if(tempBox.max[j] < currCoord) tempBox.max[j] = currCoord;
1704 	} /* END FOR j */
1705     } /* END FOR i */
1706 
1707   /* check if all distrs are at the same point */
1708 
1709   bestCoord = -1;
1710   maxDiff = 0;
1711   for(i = 0; i < NO_OF_DIMENSIONS; i++)
1712     {
1713       currDiff = tempBox.max[i] - tempBox.min[i];
1714       if(currDiff > maxDiff)
1715 	{
1716 	  bestCoord = i;
1717 	  maxDiff = currDiff;
1718 	}
1719     } /* END FOR i */
1720   if(bestCoord < 0)
1721     samePoint = 1;
1722   else
1723     {
1724       if(maxDiff > 1.0e-13)
1725 	{
1726 	  limit = (tempBox.max[bestCoord] + tempBox.min[bestCoord]) / 2;
1727 	  samePoint = 0;
1728 	}
1729       else
1730 	samePoint = 1;
1731     }
1732 
1733   if(samePoint == 1)
1734     {
1735         /* all distrs are at the same point */
1736         /* sort by extent */
1737         /* bubble sort (this could be optimized) */
1738       for(i = 0; i < (distrIndexListN-1); i++)
1739 	{
1740 	  for(j = 0; j < (distrIndexListN-1-i); j++)
1741 	    {
1742 	      if(rho_alt_1 != NULL)
1743 		{
1744 		  extent1 = rho_alt_1[distrIndexList[j]].extent;
1745 		  extent2 = rho_alt_1[distrIndexList[j+1]].extent;
1746 		}
1747 	      else
1748 		{
1749 		  extent1 = rho_alt_2[distrIndexList[j]].extent;
1750 		  extent2 = rho_alt_2[distrIndexList[j+1]].extent;
1751 		}
1752 	      if(extent1 > extent2)
1753 		{
1754                     /* do switch */
1755 		  tempInt = distrIndexList[j];
1756 		  distrIndexList[j] = distrIndexList[j+1];
1757 		  distrIndexList[j+1] = tempInt;
1758 		} /* END IF SWITCH */
1759 	    } /* END FOR j bubble sort */
1760 	} /* END FOR i bubble sort       */
1761       /* check sort */
1762       for(i = 0; i < (distrIndexListN-1); i++)
1763 	{
1764 	  if(rho_alt_1 != NULL)
1765 	    {
1766 	      extent1 = rho_alt_1[distrIndexList[i]].extent;
1767 	      extent2 = rho_alt_1[distrIndexList[i+1]].extent;
1768 	    }
1769 	  else
1770 	    {
1771 	      extent1 = rho_alt_2[distrIndexList[i]].extent;
1772 	      extent2 = rho_alt_2[distrIndexList[i+1]].extent;
1773 	    }
1774 	  if(extent1 > extent2)
1775 	    {
1776 	      do_output_2(0,
1777 			  "error in BuildRhoTreeBranch: list not sorted");
1778 	      return NULL;
1779 	    }
1780 	} /* END FOR i check sort */
1781 
1782       /* create 2 new boxes: small extent and large extent */
1783       n1 = distrIndexListN / 2;
1784       n2 = distrIndexListN - n1;
1785     }
1786   else
1787     {
1788         /* all distrs are NOT at the same point */
1789       limit = (tempBox.max[bestCoord] + tempBox.min[bestCoord]) / 2;
1790       tempList = dal_malloc_safe(distrIndexListN * sizeof(integer));
1791       n1 = 0;
1792       n2 = 0;
1793       for(i = 0; i < distrIndexListN; i++)
1794 	{
1795 	  if(rho_alt_1 != NULL)
1796 	    testCoord = rho_alt_1[distrIndexList[i]].centerCoords[bestCoord];
1797 	  else
1798 	    testCoord = rho_alt_2[distrIndexList[i]].centerCoords[bestCoord];
1799 	  if(testCoord > limit)
1800 	    {
1801 	      tempList[n1] = distrIndexList[i];
1802 	      n1++;
1803 	    }
1804 	  else
1805 	    {
1806 	      tempList[distrIndexListN-1-n2] = distrIndexList[i];
1807 	      n2++;
1808 	    }
1809 	} /* END FOR i */
1810       if((n1 == 0) || (n2 == 0))
1811 	{
1812 	  printf("error in BuildRhoTreeBranch (after split): "
1813 		 "n1 = %i, n2 = %i\n", n1, n2);
1814 	  printf("maxDiff = %.55f\n", maxDiff);
1815 	  return NULL;
1816 	}
1817 
1818       memcpy(distrIndexList, tempList, distrIndexListN * sizeof(integer));
1819       dal_free(tempList);
1820     }
1821   if((n1 == 0) || (n2 == 0))
1822     {
1823       printf("error in BuildRhoTreeBranch: n1 = %i, n2 = %i\n", n1, n2);
1824       return NULL;
1825     }
1826   child1 = BuildRhoTreeBranch(noOfDistributionsTot, rho_alt_1, rho_alt_2,
1827 			      n1, distrIndexList, targetRhoError);
1828   if(child1 == NULL)
1829     return NULL;
1830   child2 = BuildRhoTreeBranch(noOfDistributionsTot, rho_alt_1, rho_alt_2,
1831 			      n2, distrIndexList + n1, targetRhoError);
1832   if(child2 == NULL)
1833     return NULL;
1834   newNode->child1 = child1;
1835   newNode->child2 = child2;
1836   newNode->distrIndex = -1;
1837 
1838   return newNode;
1839 } /* END  */
1840 
1841 
1842 static rhoTreeNode*
BuildRhoTree(integer noOfDistributions,DistributionSpecStruct * rho_alt_1,ShellSpecStruct * rho_alt_2,real targetRhoError)1843 BuildRhoTree(integer noOfDistributions,
1844 	     DistributionSpecStruct* rho_alt_1,
1845 	     ShellSpecStruct* rho_alt_2,
1846 	     real targetRhoError)
1847 {
1848   rhoTreeNode* rootNode;
1849   integer* distrIndexList;
1850   integer i;
1851   real targetError, arg, r1;
1852   DistributionSpecStruct* distr;
1853 
1854   if(rho_alt_1 != NULL)
1855     {
1856         /* compute extent for each distribution in list */
1857       for(i = 0; i < noOfDistributions; i++)
1858 	{
1859 	  distr = &rho_alt_1[i];
1860 	  targetError = distr->coeff / 1e20;
1861 	  arg = distr->coeff / targetError;
1862 	  r1 = log(arg);
1863 	  if(r1 < 0) r1 *= -1;
1864 	  distr->extent = sqrt(r1 / distr->exponent);
1865 	} /* END FOR i */
1866     }
1867 
1868   /* set up initial index list: all distributions included */
1869   distrIndexList = dal_malloc_safe(noOfDistributions * sizeof(integer));
1870   for(i = 0; i < noOfDistributions; i++)
1871     distrIndexList[i] = i;
1872 
1873   rootNode = BuildRhoTreeBranch(noOfDistributions, rho_alt_1, rho_alt_2,
1874 				noOfDistributions, distrIndexList,
1875 				targetRhoError);
1876 
1877   if(rootNode == NULL)
1878     {
1879       do_output_2(0, "error in BuildRhoTreeBranch");
1880       return NULL;
1881     }
1882 
1883   dal_free(distrIndexList);
1884 
1885   return rootNode;
1886 } /* END BuildRhoTree */
1887 
1888 
1889 
1890 
1891 
1892 
free_rho_tree_memory(rhoTreeNode * rootNode)1893 static void free_rho_tree_memory(rhoTreeNode* rootNode)
1894 {
1895   rhoTreeNode* child1;
1896   rhoTreeNode* child2;
1897   child1 = rootNode->child1;
1898   child2 = rootNode->child2;
1899   if(child1 != NULL)
1900     free_rho_tree_memory(child1);
1901   if(child2 != NULL)
1902     free_rho_tree_memory(child2);
1903   dal_free(rootNode);
1904 } /* END free_rho_tree_memory */
1905 
1906 
round_real(real x)1907 static integer round_real(real x)
1908 {
1909   integer x1, x2;
1910   real err1, err2;
1911 
1912   x1 = (integer)x;
1913   x2 = x1 + 1;
1914   err1 = x - (real)x1;
1915   err2 = (real)x2 - x;
1916   if(err1 <= err2)
1917     return x1;
1918   else
1919     return x2;
1920 }
1921 
1922 
1923 
1924 void*
compute_grid_thread_func(void * arg)1925 compute_grid_thread_func(void* arg)
1926 {
1927     /*  char s[888]; */
1928   integer maxNoOfPoints;
1929   real (*coor)[3];
1930   real* weight;
1931   real* coorx;
1932   real* coory;
1933   real* coorz;
1934   integer noOfShells, noOfDistributions;
1935   integer noOfNonzeroShells;
1936   integer* nonZeroShellIndexList;
1937   integer noOfNonzeroDistributions;
1938   integer* nonZeroDistrIndexList;
1939   integer currShellNo, prevShellNo, tempInt;
1940   integer* listShlblocks;
1941   compute_grid_for_box_params_struct paramsStruct;
1942   real* workList;
1943   real totalIntegralResult;
1944   integer writeResultsToFile;
1945   DistributionSpecStruct* rhoForSubBox;
1946   integer noOfWrittenBatches, noOfGridPoints;
1947   BoxStruct startBox;
1948   BoxStruct subBox;
1949   DensitySpecStruct* density;
1950   compute_grid_thread_func_struct* inputParams;
1951   rhoTreeNode* rhoTreeRootNode;
1952   rhoTreeNode* rhoTreeRootNodeShells;
1953   integer* tempList;
1954   integer i, j, k, m, ii, jj, kk;
1955   integer Nx, Ny, Nz;
1956   integer nFunctions, count, nPoints, nblocks, blockStarted;
1957   integer startShellNo, NthisWrite;
1958   real Iexact, maxerror;
1959   FILE* gridFile;
1960   integer jobCount, assignedJobNumber;
1961   ShellSpecStruct* currShell;
1962 
1963   /* get hold of input params */
1964   inputParams = (compute_grid_thread_func_struct*)arg;
1965   density = inputParams->density;
1966   memcpy(&startBox, inputParams->startBox, sizeof(BoxStruct));
1967   rhoTreeRootNode = inputParams->rhoTreeRootNode;
1968   rhoTreeRootNodeShells = inputParams->rhoTreeRootNodeShells;
1969   noOfShells = density->noOfShells;
1970   noOfDistributions = density->noOfDistributions;
1971   Nx = inputParams->Nx;
1972   Ny = inputParams->Ny;
1973   Nz = inputParams->Nz;
1974   maxerror = inputParams->maxerror;
1975   gridFile = inputParams->gridFile;
1976 
1977   writeResultsToFile = 1;
1978 
1979   do_output_2(2, "thread %i entering compute_grid_thread_func..",
1980 	      inputParams->threadNo);
1981 
1982   /* allocate memory */
1983   maxNoOfPoints = FILE_BATCH_N;
1984   coor   = dal_malloc_safe(3 * maxNoOfPoints * sizeof(real));
1985   weight = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1986   coorx  = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1987   coory  = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1988   coorz  = dal_malloc_safe(maxNoOfPoints * sizeof(real));
1989   nonZeroShellIndexList = dal_malloc_safe(noOfShells * sizeof(integer));
1990   nonZeroDistrIndexList = dal_malloc_safe(density->nbast * sizeof(integer));
1991   workList = dal_malloc_safe(density->nbast *
1992 			     MAX_NO_OF_POINTS_PER_BATCH * sizeof(real));
1993   rhoForSubBox = dal_malloc_safe(noOfDistributions *
1994 				 sizeof(DistributionSpecStruct));
1995   tempList = dal_malloc_safe(noOfDistributions * sizeof(integer));
1996   listShlblocks = dal_malloc_safe(MAX_NO_OF_SHLBLOCKS * 2 * sizeof(integer));
1997 
1998 
1999   /* get initial assignedJobNumber */
2000 #ifdef USE_PTHREADS
2001   pthread_mutex_lock(inputParams->jobMutex);
2002 #endif
2003   assignedJobNumber = *inputParams->currJobNumber;
2004   *inputParams->currJobNumber += N_BATCH_JOBS;
2005 #ifdef USE_PTHREADS
2006   pthread_mutex_unlock(inputParams->jobMutex);
2007 #endif
2008   jobCount = 0;
2009   noOfWrittenBatches = 0;
2010   totalIntegralResult = 0;
2011   noOfGridPoints = 0;
2012   i = j = k = 0;
2013   for(i = 0; i < Nx; i++)
2014     {
2015       for(j = 0; j < Ny; j++)
2016 	{
2017 	  for(k = 0; k < Nz; k++)
2018 	    {
2019 	      jobCount++;
2020 	      if(jobCount >= (assignedJobNumber + N_BATCH_JOBS))
2021 		{
2022                     /* get new assignedJobNumber */
2023 #ifdef USE_PTHREADS
2024 		  pthread_mutex_lock(inputParams->jobMutex);
2025 #endif
2026 		  assignedJobNumber = *inputParams->currJobNumber;
2027 		  *inputParams->currJobNumber += N_BATCH_JOBS;
2028 #ifdef USE_PTHREADS
2029 		  pthread_mutex_unlock(inputParams->jobMutex);
2030 #endif
2031 		}
2032 	      if(jobCount < assignedJobNumber)
2033 		continue;
2034 	      if(assignedJobNumber > (Nx*Ny*Nz))
2035 		continue;
2036 	      /*printf("i = %i, j = %i, k = %i\n", i, j, k); */
2037 
2038 	      /* determine current sub-box */
2039 	      subBox.min[0] = startBox.min[0] + (real)(i + 0) *
2040 		(startBox.max[0] - startBox.min[0]) / Nx;
2041 	      subBox.max[0] = startBox.min[0] + (real)(i + 1) *
2042 		(startBox.max[0] - startBox.min[0]) / Nx;
2043 	      subBox.min[1] = startBox.min[1] + (real)(j + 0) *
2044 		(startBox.max[1] - startBox.min[1]) / Ny;
2045 	      subBox.max[1] = startBox.min[1] + (real)(j + 1) *
2046 		(startBox.max[1] - startBox.min[1]) / Ny;
2047 	      subBox.min[2] = startBox.min[2] + (real)(k + 0) *
2048 		(startBox.max[2] - startBox.min[2]) / Nz;
2049 	      subBox.max[2] = startBox.min[2] + (real)(k + 1) *
2050 		(startBox.max[2] - startBox.min[2]) / Nz;
2051 
2052 	      /* get list of non-zero shells for current sub-box */
2053 	      noOfNonzeroShells = get_distrs_for_box(nonZeroShellIndexList,
2054 						     rhoTreeRootNodeShells,
2055 						     &subBox);
2056 	      if(noOfNonzeroShells < 0)
2057 		{
2058 		  do_output_2(0, "error in get_distrs_for_box");
2059 		  return NULL;
2060 		}
2061 	      if(noOfNonzeroShells == 0)
2062 		continue;
2063 
2064 	      /* sort list of non-zero shells (bubble sort, could be optimized) */
2065 	      for(kk = 0; kk < (noOfNonzeroShells - 1); kk++)
2066 		{
2067 		  for(jj = 0; jj < (noOfNonzeroShells - 1 - kk); jj++)
2068 		    {
2069 		      if(nonZeroShellIndexList[jj] >
2070 			 nonZeroShellIndexList[jj+1])
2071 			{
2072 			  tempInt = nonZeroShellIndexList[jj];
2073  			  nonZeroShellIndexList[jj] =
2074 			    nonZeroShellIndexList[jj+1];
2075 	 		  nonZeroShellIndexList[jj+1] = tempInt;
2076 		 	}
2077                     } /* END FOR jj */
2078 	 	} /* END FOR kk */
2079 
2080 	      /* translate list of nonzero shells to list of  */
2081 	      /* nonzero contracted distributions */
2082 	      noOfNonzeroDistributions = 0;
2083 	      for(kk = 0; kk < noOfNonzeroShells; kk++)
2084 		{
2085 		  currShell = &density->shellList[nonZeroShellIndexList[kk]];
2086 		  nFunctions = 1 + 2 *
2087 		    currShell->shellType;
2088 		  for(ii = 0; ii < nFunctions; ii++)
2089 		    {
2090 		      nonZeroDistrIndexList[noOfNonzeroDistributions] =
2091 			currShell->startIndexInMatrix + ii;
2092 		      noOfNonzeroDistributions++;
2093 		    } /* END FOR ii      */
2094 		} /* END FOR kk */
2095 	      if(noOfNonzeroDistributions > density->nbast)
2096 		{
2097 		  do_output_2(0, "error: (noOfNonzeroDistributions > nbast)");
2098 		  return NULL;
2099 		}
2100 
2101 	      /* get list of relevant distributions for sub-box */
2102 	      count = get_distrs_for_box(tempList, rhoTreeRootNode, &subBox);
2103 	      if(count < 0)
2104 		{
2105 		  do_output_2(0, "error in get_distrs_for_box");
2106 		  return NULL;
2107 		}
2108 	      if(count == 0)
2109 		continue;
2110 
2111 	      for(m = 0; m < count; m++)
2112 		memcpy(&rhoForSubBox[m],
2113 		       &density->distrList[tempList[m]],
2114 		       sizeof(DistributionSpecStruct));
2115 
2116 	      Iexact = 0;
2117 	      for(kk = 0; kk < count; kk++)
2118 		Iexact +=
2119 		  compute_integral_over_box(&rhoForSubBox[kk], &subBox);
2120 
2121 	      /* create grid for sub-box */
2122 
2123 	      memcpy(&paramsStruct.density,
2124 		     density,
2125 		     sizeof(DensitySpecStruct));
2126 	      paramsStruct.density.noOfDistributions = count;
2127 	      paramsStruct.density.distrList = rhoForSubBox;
2128 
2129 	      paramsStruct.maxerrorPerBox = maxerror;
2130 	      paramsStruct.nonZeroDistrIndexList = nonZeroDistrIndexList;
2131 	      paramsStruct.noOfNonzeroDistributions = noOfNonzeroDistributions;
2132 	      paramsStruct.nonZeroShellsIndexList = nonZeroShellIndexList;
2133 	      paramsStruct.noOfNonzeroShells = noOfNonzeroShells;
2134 
2135 	      nPoints = compute_grid_for_box(&paramsStruct,
2136 					     maxNoOfPoints,
2137 					     &coor[0],
2138 					     &weight[0],
2139 					     &subBox,
2140 					     Iexact,
2141 					     workList,
2142 					     &totalIntegralResult);
2143 	      if(nPoints < 0)
2144 		{
2145 		  do_output_2(0, "error in compute_grid_for_box");
2146 		  return NULL;
2147 		}
2148 
2149 	      if(nPoints == 0)
2150 		continue;
2151 
2152 	      noOfGridPoints += nPoints;
2153 
2154 	      if(writeResultsToFile == 1)
2155 		{
2156                     integer nPointsLeft;
2157                     /* make block-list of non-zero shells to write to file */
2158 		  nblocks = 0;
2159 		  blockStarted = 0;
2160 		  startShellNo = -1;
2161 		  prevShellNo = -1;
2162 		  for(kk = 0; kk < noOfNonzeroShells; kk++)
2163 		    {
2164 		      currShellNo = nonZeroShellIndexList[kk];
2165 		      if(blockStarted == 0)
2166 			{
2167 			  blockStarted = 1;
2168 			  startShellNo = currShellNo;
2169 			}
2170 		      else
2171 			{
2172 			  if(currShellNo != (prevShellNo + 1))
2173 			    {
2174                                 /* register previous block */
2175 			      listShlblocks[nblocks*2] = startShellNo+1;
2176 			      listShlblocks[nblocks*2+1] = prevShellNo+1;
2177 			      nblocks++;
2178 			      startShellNo = currShellNo;
2179 			    }
2180 			}
2181 		      prevShellNo = currShellNo;
2182 		    } /* END FOR kk */
2183 		  if(blockStarted == 1)
2184 		    {
2185                         /* register previous block */
2186 		      listShlblocks[nblocks*2] = startShellNo+1;
2187 		      listShlblocks[nblocks*2+1] = prevShellNo+1;
2188 		      nblocks++;
2189 		    }
2190 
2191 		  /* set up separate x, y, z vectors for writing to file */
2192 		  for(kk = 0; kk < nPoints; kk++)
2193 		    {
2194 		      coorx[kk] = coor[kk][0];
2195 		      coory[kk] = coor[kk][1];
2196 		      coorz[kk] = coor[kk][2];
2197 		    }
2198 
2199 		  /* write grid points to file */
2200 		   nPointsLeft = nPoints;
2201 #ifdef USE_PTHREADS
2202 		  pthread_mutex_lock(inputParams->fileMutex);
2203 #endif
2204 		  while(nPointsLeft > 0)
2205 		    {
2206 		      if(nPointsLeft <= MAX_NO_OF_POINTS_PER_WRITE)
2207 			NthisWrite = nPointsLeft;
2208 		      else
2209 			NthisWrite = MAX_NO_OF_POINTS_PER_WRITE;
2210 		      fwrite(&NthisWrite, sizeof(integer), 1, gridFile);
2211 		      fwrite(&nblocks, sizeof(integer), 1, gridFile);
2212 		      fwrite(listShlblocks, sizeof(integer), 2*nblocks, gridFile);
2213 #if 1
2214 		      fwrite(&(coor[nPoints-nPointsLeft][0]),
2215 			     sizeof(real), 3*NthisWrite, gridFile);
2216 #else
2217 		      fwrite(&coorx[nPoints-nPointsLeft],
2218 			     sizeof(real), NthisWrite, gridFile);
2219 		      fwrite(&coory[nPoints-nPointsLeft],
2220 			     sizeof(real), NthisWrite, gridFile);
2221 		      fwrite(&coorz[nPoints-nPointsLeft],
2222 			     sizeof(real), NthisWrite, gridFile);
2223 #endif
2224 		      fwrite(&weight[nPoints-nPointsLeft],
2225 			     sizeof(real), NthisWrite, gridFile);
2226 		      nPointsLeft -= NthisWrite;
2227 		      noOfWrittenBatches++;
2228 		    } /* END WHILE points left */
2229 #ifdef USE_PTHREADS
2230 		  pthread_mutex_unlock(inputParams->fileMutex);
2231 #endif
2232 		} /* END IF (gridFile != NULL) */
2233 	    } /* END FOR k */
2234 	} /* END FOR j */
2235     } /* END FOR i */
2236 
2237 
2238   do_output_2(2, "thread %i loops done, freeing memory..",
2239 	      inputParams->threadNo);
2240 
2241   /* free memory */
2242   dal_free(coor);
2243   dal_free(weight);
2244   dal_free(coorx);
2245   dal_free(coory);
2246   dal_free(coorz);
2247   dal_free(nonZeroShellIndexList);
2248   dal_free(nonZeroDistrIndexList);
2249   dal_free(workList);
2250   dal_free(rhoForSubBox);
2251   dal_free(tempList);
2252   dal_free(listShlblocks);
2253 
2254   do_output_2(2, "thread %i mem freed OK, setting result params..",
2255 	      inputParams->threadNo);
2256 
2257   /* report results through input structure */
2258   inputParams->noOfPoints = noOfGridPoints;
2259   inputParams->noOfWrittenBatches = noOfWrittenBatches;
2260   inputParams->integralResult = totalIntegralResult;
2261 
2262   do_output_2(2, "thread %i exiting compute_grid_thread_func",
2263 	      inputParams->threadNo);
2264 
2265   return NULL;
2266 } /* END compute_grid_thread_func */
2267 
2268 
2269 
2270 int
do_test_integration(DensitySpecStruct * density,char * gridFileName)2271 do_test_integration(DensitySpecStruct* density, char* gridFileName)
2272 {
2273   real (*coor)[3];
2274   real* weight;
2275   real* coorx;
2276   real* coory;
2277   real* coorz;
2278   integer noOfNonzeroShells;
2279   integer* nonZeroShellIndexList;
2280   integer noOfNonzeroDistributions;
2281   integer* nonZeroDistrIndexList;
2282   integer finished, currIndex;
2283   integer* listShlblocks;
2284   real* workList;
2285   integer nRepeats, repeatNo;
2286   real testIntegralResult;
2287   integer maxNoOfPoints, noOfShells, noOfDistributions;
2288   /*  char s[888]; */
2289   time_t startSeconds2, endSeconds2;
2290   clock_t startClock, endClock;
2291   FILE* gridFile;
2292   integer nPoints, nblocks, ii, kk, nFunctions;
2293   integer nPointsDone, nPointsThisTime, nBatches;
2294   real secondsTakenReal;
2295 
2296 
2297   noOfShells = density->noOfShells;
2298   if(noOfShells <= 0)
2299     return -1;
2300 
2301   noOfDistributions = density->noOfDistributions;
2302   if(noOfDistributions <= 0)
2303     return -1;
2304 
2305   /* allocate memory */
2306   maxNoOfPoints = FILE_BATCH_N;
2307   coor   = dal_malloc_safe(3 * maxNoOfPoints * sizeof(real));
2308   weight = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2309   coorx  = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2310   coory  = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2311   coorz  = dal_malloc_safe(maxNoOfPoints * sizeof(real));
2312   nonZeroShellIndexList = dal_malloc_safe(noOfShells * sizeof(integer));
2313   nonZeroDistrIndexList = dal_malloc_safe(density->nbast * sizeof(integer));
2314   workList = dal_malloc_safe(density->nbast *
2315 			     MAX_NO_OF_POINTS_PER_BATCH * sizeof(real));
2316   listShlblocks = dal_malloc_safe(MAX_NO_OF_SHLBLOCKS * 2 * sizeof(integer));
2317 
2318 
2319   nRepeats = 1;
2320   do_output_2(2, "grid created OK, computing integral to check time, "
2321 	      "nRepeats = %i", nRepeats);
2322 
2323   time(&startSeconds2);
2324 
2325   for(repeatNo = 0; repeatNo < nRepeats; repeatNo++)
2326     {
2327       testIntegralResult = 0;
2328       startClock = clock();
2329       gridFile = fopen(gridFileName, "rb");
2330       if(gridFile == NULL)
2331 	{
2332 	  do_output_2(0, "error opening grid file for reading");
2333 	  return -1;
2334 	}
2335       finished = 0;
2336       nBatches = 0;
2337       while(finished == 0)
2338 	{
2339 	  if(fread(&nPoints, sizeof(integer), 1, gridFile) != 1)
2340 	    finished = 1;
2341 	  else
2342 	    {
2343 	      if(fread(&nblocks, sizeof(integer), 1, gridFile) != 1)
2344 		{
2345 		  do_output_2(0, "error reading nblocks");
2346 		  return -1;
2347 		}
2348 	      if(fread(listShlblocks, sizeof(integer), 2*nblocks, gridFile)
2349 		 != (2*nblocks))
2350 		{
2351 		  do_output_2(0, "error reading blocks");
2352 		  return -1;
2353 		}
2354 	      if(fread(coor, sizeof(real), 3*nPoints, gridFile) != 3*nPoints)
2355 		return -1;
2356 	      /*
2357 	      if(fread(coorx, sizeof(real), nPoints, gridFile) != nPoints)
2358 		return -1;
2359 	      if(fread(coory, sizeof(real), nPoints, gridFile) != nPoints)
2360 		return -1;
2361 	      if(fread(coorz, sizeof(real), nPoints, gridFile) != nPoints)
2362 		return -1;
2363 	      for(kk = 0; kk < nPoints; kk++)
2364 		{
2365 		  coor[kk][0] = coorx[kk];
2366 		  coor[kk][1] = coory[kk];
2367 		  coor[kk][2] = coorz[kk];
2368 		}
2369 	      */
2370 	      if(fread(&weight[0], sizeof(real), nPoints, gridFile) != nPoints)
2371 		return -1;
2372 
2373 
2374 	      noOfNonzeroShells = 0;
2375 	      for(kk = 0; kk < nblocks; kk++)
2376 		{
2377 		  currIndex = listShlblocks[kk*2];
2378 		  while(currIndex <= listShlblocks[kk*2+1])
2379 		    {
2380 		      nonZeroShellIndexList[noOfNonzeroShells] = currIndex-1;
2381 		      noOfNonzeroShells++;
2382 		      currIndex++;
2383 		    } /* END WHILE */
2384 		} /* END FOR kk */
2385 
2386 	      /* translate list of nonzero shells to list of nonzero
2387                * contracted distributions */
2388 	      noOfNonzeroDistributions = 0;
2389 	      for(kk = 0; kk < noOfNonzeroShells; kk++)
2390 		{
2391 		  nFunctions = 1 + 2 * density->shellList[nonZeroShellIndexList[kk]].shellType;
2392 		  for(ii = 0; ii < nFunctions; ii++)
2393 		    {
2394 		      nonZeroDistrIndexList[noOfNonzeroDistributions] =
2395 			density->shellList[nonZeroShellIndexList[kk]].startIndexInMatrix + ii;
2396 		      noOfNonzeroDistributions++;
2397 		    } /* END FOR ii */
2398 		} /* END FOR kk */
2399 
2400 	      nPointsDone = 0;
2401 	      while(nPointsDone < nPoints)
2402 		{
2403 		  if((nPoints - nPointsDone) > MAX_NO_OF_POINTS_PER_BATCH)
2404 		    nPointsThisTime = MAX_NO_OF_POINTS_PER_BATCH;
2405 		  else
2406 		    nPointsThisTime = nPoints - nPointsDone;
2407 
2408 		  testIntegralResult +=
2409 		    compute_integral_from_points(density,
2410 						 noOfNonzeroShells,
2411 						 nonZeroShellIndexList,
2412 						 noOfNonzeroDistributions,
2413 						 nonZeroDistrIndexList,
2414 						 nPointsThisTime,
2415 						 &coor[nPointsDone],
2416 						 &weight[nPointsDone],
2417 						 workList);
2418 		  nPointsDone += nPointsThisTime;
2419 		} /* END WHILE (nPointsDone < nPoints) */
2420 
2421 #if 0
2422 		  for(kk = 0; kk < nPoints; kk++)
2423 		    {
2424 		      testIntegralResult +=
2425 			compute_value_at_point_2(nonZeroShellIndexList,
2426 						 noOfNonzeroShells,
2427 						 shellList2,
2428 						 noOfShells,
2429 						 nbast,
2430 						 dmat,
2431 						 &coor[kk],
2432 						 nonZeroDistrIndexList,
2433 						 noOfNonzeroDistributions)
2434 			* weight[kk];
2435 		    } /* END FOR kk */
2436 #endif
2437 
2438 		  nBatches++;
2439 	    } /* END ELSE nPoints read OK */
2440 	} /* END WHILE more in file */
2441       endClock = clock();
2442       secondsTakenReal = (real)(endClock - startClock) / CLOCKS_PER_SEC;
2443       do_output_2(2, "integral computed, result: %.11f, "
2444 		  "took %.3f s (time value may overflow for > 1000 s)",
2445 		  testIntegralResult, secondsTakenReal);
2446       do_output_2(2, "nBatches from test integration: %i", nBatches);
2447       fclose(gridFile);
2448     } /* END FOR repeatNo */
2449 
2450   time(&endSeconds2);
2451   do_output_2(2, "time for test integration: %i s",
2452 	      (integer)(endSeconds2 - startSeconds2));
2453 
2454   /* free memory */
2455   dal_free(coor);
2456   dal_free(weight);
2457   dal_free(workList);
2458   dal_free(coorx);
2459   dal_free(coory);
2460   dal_free(coorz);
2461   dal_free(nonZeroShellIndexList);
2462   dal_free(nonZeroDistrIndexList);
2463   dal_free(listShlblocks);
2464 
2465   return 0;
2466 } /* END do_test_integration */
2467 
2468 
compute_grid(DensitySpecStruct * density,real maxerror,real boxdist,real targetRhoError,char * gridFileName,integer noOfThreads,integer doTestIntegration)2469 integer compute_grid(
2470 		 DensitySpecStruct* density,
2471 		 real maxerror,
2472 		 real boxdist,
2473 		 real targetRhoError,
2474 		 char* gridFileName,
2475 		 integer noOfThreads,
2476 		 integer doTestIntegration
2477 		 )
2478 {
2479   FILE* gridFile;
2480   char ss[888];
2481   char sss[888];
2482   integer noOfGridPoints;
2483   BoxStruct startBox;
2484   BoxStruct tempBox;
2485   integer i, j;
2486   rhoTreeNode* rhoTreeRootNode;
2487   rhoTreeNode* rhoTreeRootNodeShells;
2488   clock_t startTime, timeTaken;
2489   time_t startSeconds, endSeconds;
2490   real Iexact, absRelError;
2491   integer Nxyz[3]; /* Nx Ny Nz */
2492   integer Nx, Ny, Nz;
2493   integer IexactInteger;
2494   integer correctValueInt;
2495   real correctValue, abserrorRel;
2496   integer noOfWrittenBatches;
2497   real totalIntegralResult;
2498   integer noOfDistributions, writeResultsToFile;
2499   integer currJobNumber, noOfShells;
2500   real megaBytes;
2501   compute_grid_thread_func_struct* threadParamsList;
2502 
2503   do_output_2(2, "entering compute_grid..\n");
2504 
2505   noOfShells = density->noOfShells;
2506   if(noOfShells <= 0)
2507     return -1;
2508 
2509   noOfDistributions = density->noOfDistributions;
2510   if(noOfDistributions <= 0)
2511     return -1;
2512 
2513 
2514   make_float_string(ss, maxerror);
2515   make_float_string(sss, targetRhoError);
2516   do_output_2(2, "Entering compute_grid, noOfDistributions = %i, "
2517 	      "maxerror = %s, targetRhoError = %s",
2518 	      noOfDistributions, ss, sss);
2519 
2520   time(&startSeconds);
2521   do_output_2(2, "compute_grid start time: %s", ctime(&startSeconds));
2522 
2523   megaBytes = (real)(density->nbast * density->nbast * sizeof(real)) / 1000000;
2524   do_output_2(2, "nbast = %i, dmat uses %.1f Mb", density->nbast, megaBytes);
2525 
2526   /* set up starting box */
2527   if(get_distribution_box(&startBox,
2528 			  &density->distrList[0],
2529 			  targetRhoError) != 0)
2530     {
2531       do_output_2(0, "error in get_distribution_box");
2532       return -1;
2533     }
2534   for(i = 1; i < noOfDistributions; i++)
2535     {
2536       if(get_distribution_box(&tempBox,
2537 			      &density->distrList[i],
2538 			      targetRhoError) != 0)
2539 	{
2540 	  do_output_2(0, "error in get_distribution_box");
2541 	  return -1;
2542 	}
2543       for(j = 0; j < NO_OF_DIMENSIONS; j++)
2544 	{
2545 	  if(tempBox.min[j] < startBox.min[j])
2546 	    startBox.min[j] = tempBox.min[j];
2547 	  if(tempBox.max[j] > startBox.max[j])
2548 	    startBox.max[j] = tempBox.max[j];
2549 	} /* END FOR j */
2550     } /* END FOR i */
2551 
2552   do_output_2(2, "compute_grid starting box:");
2553   print_box(&startBox, 2);
2554 
2555   Iexact = 0;
2556   for(i = 0; i < noOfDistributions; i++)
2557     Iexact += compute_integral_over_box(&density->distrList[i], &startBox);
2558   do_output_2(2, "Analytical integral over starting box: %.22f", Iexact);
2559   IexactInteger = round_real(Iexact);
2560   absRelError = fabs((double)IexactInteger - Iexact) / (double)IexactInteger;
2561   make_float_string(ss, absRelError);
2562   do_output_2(2, "Assuming that the correct value is %i, "
2563 	      "the relative error is %s", IexactInteger, ss);
2564 
2565   do_output_2(2, "calling BuildRhoTree...");
2566   startTime = clock();
2567   rhoTreeRootNode = BuildRhoTree(noOfDistributions,
2568 				 density->distrList,
2569 				 NULL,
2570 				 targetRhoError);
2571   if(rhoTreeRootNode == NULL)
2572     {
2573       do_output_2(0, "error in BuildRhoTree\n");
2574       return -1;
2575     }
2576   timeTaken = clock() - startTime;
2577   do_output_2(2, "BuildRhoTree returned OK, time taken: %.2f s",
2578 	      ((double)timeTaken)/CLOCKS_PER_SEC);
2579 
2580   do_output_2(2, "calling BuildRhoTree for shells...");
2581   startTime = clock();
2582   rhoTreeRootNodeShells = BuildRhoTree(noOfShells,
2583 				       NULL,
2584 				       density->shellList,
2585 				       targetRhoError);
2586   if(rhoTreeRootNodeShells == NULL)
2587     {
2588       do_output_2(0, "error in BuildRhoTree\n");
2589       return -1;
2590     }
2591   timeTaken = clock() - startTime;
2592   do_output_2(2, "BuildRhoTree for shells returned OK, time taken: %.2f s",
2593 	      ((double)timeTaken)/CLOCKS_PER_SEC);
2594 
2595   /* compute Nx Ny Nz */
2596   for(i = 0; i < 3; i++)
2597       Nxyz[i] = 1 + (integer)((startBox.max[i] - startBox.min[i]) / boxdist);
2598   Nx = Nxyz[0];
2599   Ny = Nxyz[1];
2600   Nz = Nxyz[2];
2601   do_output_2(2, "boxdist = %f, Nx = %i, Ny = %i, Nz = %i, Ntot = %i",
2602 	      boxdist, Nx, Ny, Nz, Nx*Ny*Nz);
2603 
2604   /* create grid file */
2605   gridFile = fopen(gridFileName, "wb");
2606   if(gridFile == NULL)
2607     {
2608       do_output_2(0, "error opening grid file '%s' for writing", gridFileName);
2609       return -1;
2610     }
2611 
2612   writeResultsToFile = 1;
2613 
2614 
2615   /* up to this point there is no parallellization */
2616   /* this is where we start to think about threading */
2617 
2618 #ifdef USE_PTHREADS
2619   pthread_mutex_t fileMutex = PTHREAD_MUTEX_INITIALIZER;
2620   pthread_mutex_t jobMutex = PTHREAD_MUTEX_INITIALIZER;
2621 #endif
2622 
2623   threadParamsList = dal_malloc_safe(noOfThreads *
2624 				     sizeof(compute_grid_thread_func_struct));
2625 
2626   currJobNumber = 1;
2627 
2628   for(i = 0; i < noOfThreads; i++)
2629     {
2630       threadParamsList[i].density = density;
2631       threadParamsList[i].rhoTreeRootNode = rhoTreeRootNode;
2632       threadParamsList[i].rhoTreeRootNodeShells = rhoTreeRootNodeShells;
2633       threadParamsList[i].maxerror = maxerror;
2634       threadParamsList[i].gridFile = gridFile;
2635       threadParamsList[i].startBox = &startBox;
2636       threadParamsList[i].Nx = Nx;
2637       threadParamsList[i].Ny = Ny;
2638       threadParamsList[i].Nz = Nz;
2639 #ifdef USE_PTHREADS
2640       threadParamsList[i].fileMutex = &fileMutex;
2641       threadParamsList[i].jobMutex = &jobMutex;
2642 #endif
2643       threadParamsList[i].currJobNumber = &currJobNumber;
2644       threadParamsList[i].noOfPoints = -1;
2645       threadParamsList[i].noOfWrittenBatches = 0;
2646       threadParamsList[i].integralResult = 0;
2647       threadParamsList[i].threadNo = i;
2648     } /* END FOR i */
2649 
2650 
2651 #ifndef USE_PTHREADS
2652   do_output_2(2, "USE_PTHREADS not set, no threads created");
2653   if(noOfThreads != 1)
2654     {
2655       do_output_2(0, "error: cannot skip threads when (noOfThreads != 1)");
2656       return -1;
2657     }
2658   compute_grid_thread_func(&threadParamsList[0]);
2659 #else
2660   do_output_2(2, "Starting %i threads.", noOfThreads);
2661 
2662   /* start threads */
2663   for(i = 0; i < noOfThreads; i++)
2664     {
2665       if(pthread_create(&threadParamsList[i].thread,
2666 			NULL,
2667 			compute_grid_thread_func,
2668 			&threadParamsList[i]) != 0)
2669 	{
2670 	  do_output_2(0, "Error in pthread_create for thread %i", i);
2671 	  do_output_2(0, "waiting for already created threads..");
2672 	  for(j = 0; j < i; j++)
2673 	    {
2674 	      if(pthread_join(threadParamsList[j].thread, NULL) != 0)
2675 		{
2676 		  do_output_2(0, "Error in pthread_join for thread %i", j);
2677 		}
2678 	    } /* END FOR j */
2679 	  do_output_2(0, "all threads finished, returning error code");
2680 	  return -1;
2681 	}
2682     } /* END FOR i */
2683 
2684   do_output_2(2, "%i threads started OK.", noOfThreads);
2685 
2686   /* wait for threads to finish */
2687   for(i = 0; i < noOfThreads; i++)
2688     {
2689       if(pthread_join(threadParamsList[i].thread, NULL) != 0)
2690 	{
2691 	  do_output_2(0, "Error in pthread_join for thread %i", i);
2692 	}
2693     } /* END FOR i */
2694 
2695   do_output_2(2, "all %i threads have finished:", noOfThreads);
2696   for(i = 0; i < noOfThreads; i++)
2697     do_output_2(2, "thread %2i noOfWrittenBatches = %6i",
2698 		i, threadParamsList[i].noOfWrittenBatches);
2699 #endif
2700 
2701   /* now all threads have finished, check for errors */
2702   for(i = 0; i < noOfThreads; i++)
2703     {
2704       if(threadParamsList[i].noOfPoints < 0)
2705 	{
2706 	  do_output_2(0, "error in compute_grid_thread_func"
2707 		      " for thread %i\n", i);
2708 	  return -1;
2709 	}
2710     } /* END FOR i */
2711 
2712 
2713 
2714   noOfGridPoints = 0;
2715   noOfWrittenBatches = 0;
2716   totalIntegralResult = 0;
2717   for(i = 0; i < noOfThreads; i++)
2718     {
2719       noOfGridPoints += threadParamsList[i].noOfPoints;
2720       noOfWrittenBatches += threadParamsList[i].noOfWrittenBatches;
2721       totalIntegralResult += threadParamsList[i].integralResult;
2722     } /* END FOR i */
2723 
2724   fclose(gridFile);
2725 
2726   do_output_2(2, "noOfWrittenBatches = %i", noOfWrittenBatches);
2727 
2728   time(&endSeconds);
2729   do_output_2(1, "compute_grid ending OK, noOfGridPoints = %i, took %i s",
2730 	      noOfGridPoints, (integer)(endSeconds - startSeconds));
2731   do_output_2(2, "compute_grid totalIntegralResult = %.11f",
2732 	      totalIntegralResult);
2733   correctValueInt = round_real(totalIntegralResult);
2734   correctValue = correctValueInt;
2735   abserrorRel = fabs(totalIntegralResult - correctValue) / correctValue;
2736   make_float_string(ss, abserrorRel);
2737   do_output_2(2, "Assuming that the correct value is %i,\n"
2738 	      "the absolute relative error is %s", correctValueInt, ss);
2739 
2740   if(doTestIntegration == 1)
2741     {
2742       if(do_test_integration(density, gridFileName) != 0)
2743 	{
2744 	  do_output_2(0, "error in do_test_integration");
2745 	  return -1;
2746 	}
2747     } /* END IF */
2748 
2749 
2750   free_rho_tree_memory(rhoTreeRootNode);
2751   free_rho_tree_memory(rhoTreeRootNodeShells);
2752 
2753   dal_free(threadParamsList);
2754 
2755   time(&endSeconds);
2756   do_output_2(2, "compute_grid finish time: %s", ctime(&endSeconds));
2757 
2758   return noOfGridPoints;
2759 } /* END compute_grid */
2760 
2761 
2762 static integer
do_merge_sort_distrs(integer n,DistributionSpecStruct * list,DistributionSpecStruct * workList)2763 do_merge_sort_distrs(integer n,
2764 		     DistributionSpecStruct* list,
2765 		     DistributionSpecStruct* workList)
2766 {
2767     /* merge sort:  */
2768     /* first sort the first half, */
2769     /* then sort the second half, */
2770     /* then merge results to form final sorted list. */
2771   integer n1, n2, nn, decision, i1, i2, i;
2772   DistributionSpecStruct* d1;
2773   DistributionSpecStruct* d2;
2774 
2775   if(n < 1)
2776     {
2777       do_output_2(0, "(n < 1)");
2778       return -1;
2779     }
2780   if(n == 1)
2781     return 0;
2782 
2783   n1 = n / 2;
2784   n2 = n - n1;
2785 
2786   /* sort first half */
2787   if(do_merge_sort_distrs(n1, list, workList) != 0)
2788     return -1;
2789 
2790   /* sort second half */
2791   if(do_merge_sort_distrs(n2, &list[n1], workList) != 0)
2792     return -1;
2793 
2794   /* merge results */
2795   nn = 0;
2796   i1 = 0;
2797   i2 = 0;
2798   while(nn < n)
2799     {
2800       if((i1 < n1) && (i2 < n2))
2801 	{
2802             /* compare */
2803 	  d1 = &list[i1];
2804 	  d2 = &list[n1+i2];
2805 	  decision = 0;
2806 	  for(i = 0; i < 3; i++)
2807 	    {
2808 	      if(decision == 0)
2809 		{
2810 		  if(d1->monomialInts[i] != d2->monomialInts[i])
2811 		    {
2812 		      if(d1->monomialInts[i] > d2->monomialInts[i])
2813 			decision = 1;
2814 		      else
2815 			decision = 2;
2816 		    }
2817 		} /* END IF (decision == 0) */
2818 	    } /* END FOR i */
2819 	  if(decision == 0)
2820 	    {
2821                 /* check exponents */
2822 	      if(d1->exponent > d2->exponent)
2823 		decision = 1;
2824 	      else
2825 		decision = 2;
2826 	    }
2827 	}
2828       else
2829 	{
2830 	  if(i1 == n1)
2831 	      decision = 2;
2832 	  else
2833 	      decision = 1;
2834 	}
2835       if(decision <= 0)
2836 	{
2837 	  do_output_2(0, "(decision <= 0)");
2838 	  return -1;
2839 	}
2840       if(decision == 1)
2841 	{
2842 	  memcpy(&workList[nn], &list[i1], sizeof(DistributionSpecStruct));
2843 	  i1++;
2844 	}
2845       else
2846 	{
2847 	  memcpy(&workList[nn], &list[n1+i2], sizeof(DistributionSpecStruct));
2848 	  i2++;
2849 	}
2850       nn++;
2851     } /* END WHILE (nn < n) */
2852   if(i1 != n1)
2853     {
2854       do_output_2(0, "(i1 != n1)");
2855       return -1;
2856     }
2857   if(i2 != n2)
2858     {
2859       do_output_2(0, "(i2 != n2)");
2860       return -1;
2861     }
2862   if(nn != n)
2863     {
2864       do_output_2(0, "(nn != n)");
2865       return -1;
2866     }
2867   memcpy(list, workList, n * sizeof(DistributionSpecStruct));
2868   return 0;
2869 } /* END do_merge_sort_distrs */
2870 
2871 
2872 
2873 static integer
compute_extent_for_shells(BasisInfoStruct * basisInfo,real targetRhoError)2874 compute_extent_for_shells(BasisInfoStruct* basisInfo, real targetRhoError)
2875 {
2876   integer i;
2877   for(i = 0; i < basisInfo->noOfShells; i++)
2878     {
2879       ShellSpecStruct* currShell = &basisInfo->shellList[i];
2880       integer contr = currShell->noOfContr;
2881       integer worstIndex = -1;
2882       real largestExtent = 0;
2883       integer kk;
2884       for(kk = 0; kk < contr; kk++)
2885 	{
2886 	  DistributionSpecStruct testDistr;
2887 	  BoxStruct testBox;
2888 	  integer j;
2889 	  real currExtent;
2890 
2891 	  testDistr.coeff = currShell->coeffList[kk];
2892 	  testDistr.exponent = currShell->exponentList[kk];
2893 	  for(j = 0; j < 3; j++)
2894 	    testDistr.centerCoords[j] = 0;
2895 	  for(j = 0; j < 3; j++)
2896 	    testDistr.monomialInts[j] = 0;
2897 	  get_distribution_box(&testBox, &testDistr, targetRhoError);
2898           currExtent = (testBox.max[0] - testBox.min[0]) / 2;
2899 	  if(currExtent > largestExtent)
2900 	    {
2901 	      largestExtent = currExtent;
2902 	      worstIndex = kk;
2903 	    }
2904 	} /* END FOR kk */
2905       if(worstIndex < 0)
2906 	return -1;
2907       currShell->extent = largestExtent;
2908     } /* END FOR i */
2909   return 0;
2910 }
2911 
2912 
2913 
2914 
2915 
2916 static integer
get_no_of_primitives_for_density(real cutoff,const real * dmat,BasisInfoStruct * basisInfo)2917 get_no_of_primitives_for_density(real cutoff,
2918 				 const real *dmat,
2919 				 BasisInfoStruct* basisInfo)
2920 {
2921 #define MAX_DISTR_IN_TEMP_LIST 888
2922 
2923   integer i, j;
2924   integer symmetryFactor;
2925   integer nBasisFuncs, nn;
2926 
2927   do_output_2(2, "entering function get_no_of_primitives_for_density, cutoff = %22.15f", cutoff);
2928 
2929   nBasisFuncs = basisInfo->noOfBasisFuncs;
2930   nn = 0;
2931   for(i = 0; i < nBasisFuncs; i++)
2932     {
2933       for(j = 0; j < nBasisFuncs; j++)
2934 	{
2935 	  DistributionSpecStruct tempList[MAX_DISTR_IN_TEMP_LIST];
2936 	  integer nPrimitives, k;
2937 	  /* the matrix M is symmetric: include diagonal terms once, */
2938 	  /* and include upper off-diagonal terms multiplied by 2 */
2939 	  if(i == j)
2940               symmetryFactor = 1;
2941 	  else
2942 	    symmetryFactor = 2;
2943 	  if(i > j)
2944 	    continue;
2945           nPrimitives =
2946 	    get_product_simple_primitives(basisInfo, i,
2947 					  basisInfo, j,
2948 					  tempList,
2949 					  MAX_DISTR_IN_TEMP_LIST);
2950 	  do_output_2(3, "get_product_simple_primitives returned %i",
2951 		      nPrimitives);
2952 	  if(nPrimitives <= 0)
2953 	    {
2954 	      do_output_2(0, "error in get_product_simple_primitives");
2955 	      return -1;
2956 	    }
2957 	  for(k = 0; k < nPrimitives; k++)
2958 	    {
2959 	      DistributionSpecStruct* currDistr = &tempList[k];
2960 	      real Mij = dmat[i*nBasisFuncs+j];
2961 	      real newCoeff = currDistr->coeff * Mij * symmetryFactor;
2962 	      if(fabs(newCoeff) > cutoff)
2963 		nn++;
2964 	    }
2965 	}
2966     }
2967   return nn;
2968 }
2969 
2970 
2971 
2972 
2973 static integer
get_density(DistributionSpecStruct ** rhoPtr,integer maxCountShellList,integer * noOfShellsReturn,real cutoffInp,real targetRhoError,integer nbast,const real * dmat,ShellSpecStruct * shellList,BasisFuncStruct * basisFuncList)2974 get_density(DistributionSpecStruct** rhoPtr,
2975 	    integer maxCountShellList,
2976 	    integer* noOfShellsReturn,
2977 	    real cutoffInp,
2978 	    real targetRhoError,
2979 	    integer nbast,
2980 	    const real *dmat,
2981 	    ShellSpecStruct* shellList,
2982 	    BasisFuncStruct* basisFuncList)
2983 {
2984 #define MAX_DISTR_IN_TEMP_LIST 888
2985   real cutoff = cutoffInp;
2986 
2987   /*char s[888]; */
2988   integer i, j, k, kk;
2989   DistributionSpecStruct* workList;
2990   DistributionSpecStruct* rhoSaved;
2991   real absvalue;
2992   real absdiff;
2993   real sqrtValue;
2994   integer sameYesNo, firstIndex, count, withinLimit, resultCount;
2995   real coeffSum;
2996   integer* markList;
2997   integer symmetryFactor;
2998   integer nBasisFuncs, nn, nNeededForRho;
2999   BasisInfoStruct basisInfo;
3000   DistributionSpecStruct* rho;
3001 
3002   do_output_2(2, "entering function get_density, cutoff = %22.15f", cutoff);
3003 
3004   if(get_shells(&basisInfo) != 0)
3005     {
3006       do_output_2(0, "error in get_shells");
3007       return -1;
3008     }
3009   do_output_2(2, "get_shells returned OK, number of shells: %i",
3010 	      basisInfo.noOfShells);
3011   if(compute_extent_for_shells(&basisInfo, targetRhoError) != 0)
3012     {
3013       do_output_2(0, "error in compute_extent_for_shells");
3014       return -1;
3015     }
3016   do_output_2(2, "compute_extent_for_shells returned OK");
3017   if(get_basis_funcs(&basisInfo) != 0)
3018     {
3019       do_output_2(0, "error in get_basis_funcs");
3020       return -1;
3021     }
3022   do_output_2(2, "get_basis_funcs returned OK, number of basis funcs: %i",
3023 	      basisInfo.noOfBasisFuncs);
3024   if(get_simple_primitives_all(&basisInfo) != 0)
3025     {
3026       do_output_2(0, "error in get_simple_primitives_all");
3027       return -1;
3028     }
3029   do_output_2(2, "get_simple_primitives_all returned OK, n = %i",
3030 	      basisInfo.noOfSimplePrimitives);
3031 
3032 
3033   /* find out how much space is needed for rho */
3034   nNeededForRho = get_no_of_primitives_for_density(cutoff,
3035                                                    dmat,
3036                                                    &basisInfo);
3037   if(nNeededForRho <= 0)
3038     {
3039       do_output_2(0, "error in get_no_of_primitives_for_density");
3040       return -1;
3041     }
3042 
3043   /* allocate rho */
3044   rho = dal_malloc_safe(nNeededForRho * sizeof(DistributionSpecStruct));
3045   *rhoPtr = rho;
3046 
3047   nBasisFuncs = basisInfo.noOfBasisFuncs;
3048   nn = 0;
3049   for(i = 0; i < nBasisFuncs; i++)
3050     {
3051       for(j = 0; j < nBasisFuncs; j++)
3052 	{
3053 	  DistributionSpecStruct tempList[MAX_DISTR_IN_TEMP_LIST];
3054 	  integer nPrimitives, k;
3055             /*printf("i = %i, j = %i\n", i, j); */
3056 	  /* the matrix M is symmetric: include diagonal terms once, */
3057 	  /* and include upper off-diagonal terms multiplied by 2 */
3058 	  if(i == j)
3059               symmetryFactor = 1;
3060 	  else
3061 	    symmetryFactor = 2;
3062 	  if(i > j)
3063 	    continue;
3064 	  /*printf("calling get_product_simple_primitives\n"); */
3065           nPrimitives =
3066 	    get_product_simple_primitives(&basisInfo, i,
3067 					  &basisInfo, j,
3068 					  tempList,
3069 					  MAX_DISTR_IN_TEMP_LIST);
3070 	  do_output_2(3, "get_product_simple_primitives returned %i",
3071 		      nPrimitives);
3072 	  if(nPrimitives <= 0)
3073 	    {
3074 	      do_output_2(0, "error in get_product_simple_primitives");
3075 	      return -1;
3076 	    }
3077 	  for(k = 0; k < nPrimitives; k++)
3078 	    {
3079 	      DistributionSpecStruct* currDistr = &tempList[k];
3080 	      real Mij = dmat[i*nBasisFuncs+j];
3081 	      /*printf("symmetryFactor = %i\n", symmetryFactor); */
3082 	      real newCoeff = currDistr->coeff * Mij * symmetryFactor;
3083 	      do_output_2(4, "Mij = %33.22f", Mij);
3084 	      do_output_2(4, "currDistr->coeff = %33.22f", currDistr->coeff);
3085 	      do_output_2(4, "newCoeff = %33.22f", newCoeff);
3086 	      if(fabs(newCoeff) > cutoff)
3087 		{
3088 		  /* add to final list */
3089 		  if(nn > nNeededForRho)
3090 		    {
3091 		      do_output_2(0, "error: (nn > nNeededForRho)");
3092 		      return -1;
3093 		    }
3094 		  memcpy(&rho[nn], currDistr,
3095 			 sizeof(DistributionSpecStruct));
3096 		  rho[nn].coeff = newCoeff;
3097 		  nn++;
3098 		}
3099 	    }
3100 	}
3101     }
3102 
3103   *noOfShellsReturn = basisInfo.noOfShells;
3104 
3105   memcpy(shellList, basisInfo.shellList,
3106 	 basisInfo.noOfShells * sizeof(ShellSpecStruct));
3107 
3108   memcpy(basisFuncList, basisInfo.basisFuncList,
3109 	 basisInfo.noOfShells * sizeof(BasisFuncStruct));
3110 
3111 
3112   do_output_2(2, "loop ended OK; list 'rho' created, nn = %i", nn);
3113 
3114   /* Now all distributions are stored in the list 'rho'. */
3115   /* The number of entries in the list is nn. */
3116   /* It could happen that all entries are not unique. */
3117   /* We want to join distributions that have the same center  */
3118   /* and the same exponent. */
3119   /* To do this, start with sorting the list by nx, ny, nz, exponent. */
3120   workList = dal_malloc_safe(nn * sizeof(DistributionSpecStruct));
3121   rhoSaved = dal_malloc_safe(nn * sizeof(DistributionSpecStruct));
3122   memcpy(rhoSaved, rho, nn * sizeof(DistributionSpecStruct));
3123 
3124   do_output_2(2, "calling do_merge_sort_distrs, nn = %i", nn);
3125   if(do_merge_sort_distrs(nn, rho, workList) != 0)
3126     {
3127       do_output_2(0, "error in do_merge_sort_distrs");
3128       return -1;
3129     }
3130   do_output_2(2, "do_merge_sort_distrs returned OK");
3131 
3132 
3133   do_output_2(2, "checking sort..");
3134   /* check that list is sorted */
3135   for(i = 0; i < (nn-1); i++)
3136     {
3137       if(rho[i].exponent < rho[i+1].exponent)
3138 	{
3139 	  sameYesNo = 1;
3140 	  for(j = 0; j < 3; j++)
3141 	    {
3142 	      if(rho[i].monomialInts[j] != rho[i+1].monomialInts[j])
3143 		sameYesNo = 0;
3144 	    } /* END FOR j */
3145 	  if(sameYesNo == 1)
3146 	    {
3147 	      printf("error: distr list NOT properly sorted\n");
3148 	      return -1;
3149 	    }
3150 	}
3151     } /* END FOR i */
3152   do_output_2(2, "sort checked OK");
3153 
3154 
3155   markList = dal_malloc_safe(nn * sizeof(integer));
3156   for(i = 0; i < nn; i++)
3157     markList[i] = 0;
3158 
3159   /* now go through sorted list, joining distributions where possible */
3160   i = 0;
3161   count = 0;
3162   firstIndex = 0;
3163   while(i < nn)
3164     {
3165         /* check if this entry has the same nx ny nz as current 'firstIndex' */
3166       sameYesNo = 1;
3167       for(j = 0; j < 3; j++)
3168 	{
3169 	  if(rho[i].monomialInts[j] != rho[firstIndex].monomialInts[j])
3170 	    sameYesNo = 0;
3171 	} /* END FOR j */
3172       /* check exponent */
3173       absdiff = fabs(rho[i].exponent - rho[firstIndex].exponent);
3174       if(absdiff > EXPONENT_DIFF_LIMIT)
3175 	sameYesNo = 0;
3176       if(sameYesNo == 0)
3177 	{
3178 	  for(j = firstIndex; j < i; j++)
3179 	    {
3180 	      if(markList[j] == 0)
3181 		{
3182 		  markList[j] = 1;
3183 		  /* join distrs that have centers within  */
3184 		  /* DISTR_CENTER_DIST_LIMIT of this one */
3185 		  coeffSum = rho[j].coeff;
3186 		  for(k = j+1; k < i; k++)
3187 		    {
3188 		      withinLimit = 1;
3189 		      for(kk = 0; kk < 3; kk++)
3190 			{
3191 			  absdiff = fabs(rho[j].centerCoords[kk] -
3192 					 rho[k].centerCoords[kk]);
3193 			  if(absdiff > DISTR_CENTER_DIST_LIMIT)
3194 			    withinLimit = 0;
3195 			} /* END FOR kk */
3196 		      if(withinLimit == 1)
3197 			{
3198 			  coeffSum += rho[k].coeff;
3199 			  markList[k] = 1;
3200 			  /*printf("found, k = %i\n", k); */
3201 			}
3202 		    } /* END FOR k */
3203 		  memcpy(&workList[count],
3204 			 &rho[j],
3205 			 sizeof(DistributionSpecStruct));
3206 		  workList[count].coeff = coeffSum;
3207 		  count++;
3208 		} /* END IF (markList[j] == 0) */
3209 	    } /* END FOR j */
3210 	  /*printf("setting firstIndex = %i\n", i); */
3211 	  firstIndex = i;
3212 	}
3213       else
3214 	{
3215             /*printf("sameYesNo = 1\n"); */
3216 	}
3217       i++;
3218     } /* END WHILE (i < nn) */
3219   /* take care of last part */
3220   for(j = firstIndex; j < nn; j++)
3221     {
3222       if(markList[j] == 0)
3223 	{
3224 	  markList[j] = 1;
3225 	  /* join distrs that have centers within  */
3226 	  /* DISTR_CENTER_DIST_LIMIT of this one */
3227 	  coeffSum = rho[j].coeff;
3228 	  for(k = j+1; k < nn; k++)
3229 	    {
3230 	      withinLimit = 1;
3231 	      for(kk = 0; kk < 3; kk++)
3232 		{
3233 		  absdiff = fabs(rho[j].centerCoords[kk] -
3234 				 rho[k].centerCoords[kk]);
3235 		  if(absdiff > DISTR_CENTER_DIST_LIMIT)
3236 		    withinLimit = 0;
3237 		} /* END FOR kk */
3238 	      if(withinLimit == 1)
3239 		{
3240 		  coeffSum += rho[k].coeff;
3241 		  markList[k] = 1;
3242 		}
3243 	    } /* END FOR k */
3244 	  memcpy(&workList[count], &rho[j], sizeof(DistributionSpecStruct));
3245 	  workList[count].coeff = coeffSum;
3246 	  count++;
3247 	} /* END IF (markList[j] == 0) */
3248     } /* END FOR j */
3249 
3250   for(j = 0; j < nn; j++)
3251     {
3252       if(markList[j] != 1)
3253 	{
3254 	  printf("error: (markList[%i] != 1)\n", j);
3255 	  return -1;
3256 	}
3257     } /* END FOR j */
3258 
3259 
3260   /* now move results back to list 'rho',  */
3261   /* skipping those that have too small coeff */
3262   resultCount = 0;
3263   for(i = 0; i < count; i++)
3264     {
3265       sqrtValue = sqrt(CONSTPI / workList[i].exponent);
3266       absvalue = workList[i].coeff * sqrtValue * sqrtValue * sqrtValue;
3267       if(absvalue < 0) absvalue *= -1;
3268       if(absvalue > cutoff)
3269 	{
3270 	  memcpy(&rho[resultCount],
3271 		 &workList[i],
3272 		 sizeof(DistributionSpecStruct));
3273 	  resultCount++;
3274 	}
3275     } /* END FOR i */
3276   /*memcpy(rho, workList, count * sizeof(DistributionSpecStruct)); */
3277 
3278   do_output_2(2, "nn          = %9i", nn);
3279   do_output_2(2, "count       = %9i", count);
3280   do_output_2(2, "resultCount = %9i", resultCount);
3281 
3282   /*dal_free(list); */
3283   /*dal_free(indexList); */
3284   dal_free(workList);
3285   dal_free(markList);
3286   dal_free(rhoSaved);
3287 
3288   return resultCount;
3289 } /* END read_density_file */
3290 
3291 
3292 
3293 void
do_cartesian_grid(integer nbast,const real * dmat,DftGridReader * res)3294 do_cartesian_grid(integer nbast, const real* dmat, DftGridReader* res)
3295 {
3296   DistributionSpecStruct* rho;
3297   ShellSpecStruct* shellList;
3298   BasisFuncStruct* basisFuncList;
3299   integer noOfShells, nGridPoints, noOfDistributions;
3300   DensitySpecStruct density;
3301 
3302   /* check if new grid must be created */
3303   if((global_gridCount == 0) ||
3304      ((global_gridCount+1) % global_nFreeze) == 0)
3305     {
3306 
3307         /* check dmat */
3308       real maxabs = 0;
3309       integer i;
3310       for(i = 0; i < nbast*nbast; i++)
3311 	{
3312 	  real temp = fabs(dmat[i]);
3313 	  if(temp > maxabs)
3314 	    maxabs = temp;
3315 	}
3316       do_output_2(2, "do_cartesian_grid checking dmat: maxabs = %22.15f",
3317 		  maxabs);
3318 
3319         /* allocate memory */
3320         /*printf("allocating memory..\n"); */
3321       shellList =
3322 	dal_malloc(global_maxNoOfShells *
3323 		   sizeof(ShellSpecStruct));
3324       basisFuncList =
3325 	dal_malloc(nbast * sizeof(BasisFuncStruct));
3326 
3327       noOfDistributions =
3328 	get_density(&rho,
3329 		    global_maxNoOfShells,
3330 		    &noOfShells,
3331 		    global_distrCutoff,
3332 		    global_targetRhoError,
3333 		    nbast,
3334 		    dmat,
3335 		    shellList,
3336 		    basisFuncList);
3337       if(noOfDistributions <= 0)
3338 	{
3339 	  fprintf(stderr, "error in read_density_file\n");
3340 	  fort_print("error in read_density_file");
3341 	  abort();
3342 	  return;
3343 	}
3344 
3345       density.noOfShells = noOfShells;
3346       density.shellList = shellList;
3347       density.nbast = nbast;
3348       density.dmat = dmat;
3349       density.basisFuncList = basisFuncList;
3350       density.noOfDistributions = noOfDistributions;
3351       density.distrList = rho;
3352 
3353       /* get grid */
3354       nGridPoints = compute_grid(&density,
3355 				 global_maxerror,
3356 				 global_boxdist,
3357 				 global_targetRhoError,
3358 				 "DALTON.cQUAD",
3359 				 global_nThreads,
3360 				 global_doTestIntegration);
3361       if(nGridPoints <= 0)
3362 	{
3363 	  fprintf(stderr, "error in compute_grid");
3364 	  fort_print("error in compute_grid");
3365 	  abort();
3366 	  return;
3367 	}
3368 
3369       /* free memory */
3370       free(shellList);
3371       free(rho);
3372       free(basisFuncList);
3373 
3374     } /* END IF create new grid */
3375 
3376   global_gridCount++;
3377   return;
3378 }
3379 
3380 
3381 int output_energy_counter = 0;
3382 FILE* energy_file = NULL;
3383 
3384 void
output_energy_(double * energyPtr)3385 output_energy_(double* energyPtr)
3386 {
3387   double energy = *energyPtr;
3388   char s[888];
3389 
3390   output_energy_counter++;
3391   sprintf(s, "Energy %2i: %20.12f", output_energy_counter, energy);
3392   do_output_2(1, s);
3393   if(energy_file == NULL)
3394     {
3395       energy_file = fopen("energy_file.txt", "wt");
3396     }
3397   fprintf(energy_file, "%s\n", s);
3398   fflush(energy_file);
3399 }
3400 
3401