1 /*  Copyright (C) 2003-2007  CAMP
2  *  Copyright (C) 2007-2008  CAMd
3  *  Copyright (C) 2008-2010  CSC - IT Center for Science Ltd.
4  *  Copyright (C) 2011  Argonne National Laboratory
5  *  Please see the accompanying LICENSE file for further information. */
6 
7 #include <Python.h>
8 #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
9 #define NO_IMPORT_ARRAY
10 #include <numpy/arrayobject.h>
11 #include "extensions.h"
12 #include <math.h>
13 #include <stdlib.h>
14 #ifdef __DARWIN_UNIX03
15 /* Allows for special MaxOS magic */
16 #include <malloc/malloc.h>
17 #endif
18 
19 #ifdef _OPENMP
20 #include <omp.h>
21 #endif
22 
23 #ifdef GPAW_HPM
24 void HPM_Start(char *);
25 void HPM_Stop(char *);
26 void summary_start(void);
27 void summary_stop(void);
28 
ibm_hpm_start(PyObject * self,PyObject * args)29 PyObject* ibm_hpm_start(PyObject *self, PyObject *args)
30 {
31   char* s;
32   if (!PyArg_ParseTuple(args, "s", &s))
33     return NULL;
34   HPM_Start(s);
35   Py_RETURN_NONE;
36 }
37 
ibm_hpm_stop(PyObject * self,PyObject * args)38 PyObject* ibm_hpm_stop(PyObject *self, PyObject *args)
39 {
40   char* s;
41   if (!PyArg_ParseTuple(args, "s", &s))
42     return NULL;
43   HPM_Stop(s);
44   Py_RETURN_NONE;
45 }
46 
ibm_mpi_start(PyObject * self)47 PyObject* ibm_mpi_start(PyObject *self)
48 {
49   summary_start();
50   Py_RETURN_NONE;
51 }
52 
ibm_mpi_stop(PyObject * self)53 PyObject* ibm_mpi_stop(PyObject *self)
54 {
55   summary_stop();
56   Py_RETURN_NONE;
57 }
58 #endif
59 
60 
61 #ifdef CRAYPAT
62 #include <pat_api.h>
63 
craypat_region_begin(PyObject * self,PyObject * args)64 PyObject* craypat_region_begin(PyObject *self, PyObject *args)
65 {
66   int n;
67   char* s;
68   if (!PyArg_ParseTuple(args, "is", &n, &s))
69     return NULL;
70   PAT_region_begin(n, s);
71   Py_RETURN_NONE;
72 }
73 
craypat_region_end(PyObject * self,PyObject * args)74 PyObject* craypat_region_end(PyObject *self, PyObject *args)
75 {
76   int n;
77   if (!PyArg_ParseTuple(args, "i", &n))
78     return NULL;
79   PAT_region_end(n);
80   Py_RETURN_NONE;
81 }
82 #endif
83 
get_num_threads(PyObject * self,PyObject * args)84 PyObject* get_num_threads(PyObject *self, PyObject *args)
85 {
86   int nthreads = 1;
87 #ifdef _OPENMP
88   #pragma omp parallel
89   {
90     nthreads = omp_get_num_threads();
91   }
92 #endif
93   return Py_BuildValue("i", nthreads);
94 }
95 
96 #ifdef PARALLEL
97 #include <mpi.h>
98 
99 struct eval {
100   double val;
101   int rank;
102 };
103 
coll_print(FILE * fp,const char * label,double val,int print_aggregate,MPI_Comm Comm)104 static void coll_print(FILE *fp, const char *label, double val,
105                        int print_aggregate, MPI_Comm Comm){
106   double sum;
107   struct eval in;
108   struct eval out;
109   int rank, numranks;
110   MPI_Comm_size(Comm, &numranks);
111   MPI_Comm_rank(Comm, &rank);
112   in.val=val;
113   in.rank=rank;
114 
115   MPI_Reduce(&val, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, Comm);
116   if(rank==0) {
117     if(print_aggregate)
118       fprintf(fp,"#%19s %14.3f %10.3f ",label,sum,sum/numranks);
119     else
120       fprintf(fp,"#%19s                %10.3f ",label,sum/numranks);
121   }
122 
123   MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MINLOC, 0, Comm);
124   if(rank==0){
125     fprintf(fp,"%4d %10.3f ", out.rank, out.val);
126   }
127   MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MAXLOC, 0, Comm);
128   if(rank==0){
129     fprintf(fp,"%4d %10.3f\n",out.rank, out.val);
130   }
131 }
132 
133 // Utilities for performance measurement with PAPI
134 #ifdef GPAW_PAPI
135 #include <papi.h>
136 
137 #define NUM_PAPI_EV 1
138 
139 static long_long papi_start_usec_p;
140 static long_long papi_start_usec_r;
141 
142 // Returns PAPI_dmem_info structure in Python dictionary
143 // Units used by PAPI are kB
papi_mem_info(PyObject * self,PyObject * args)144 PyObject* papi_mem_info(PyObject *self, PyObject *args)
145 {
146   PAPI_dmem_info_t dmem;
147   PyObject* py_dmem;
148 
149   PAPI_get_dmem_info(&dmem);
150 
151   py_dmem = PyDict_New();
152   PyDict_SetItemString(py_dmem, "peak", PyLong_FromLongLong(dmem.peak));
153   PyDict_SetItemString(py_dmem, "size", PyLong_FromLongLong(dmem.size));
154   PyDict_SetItemString(py_dmem, "resident", PyLong_FromLongLong(dmem.resident));
155   PyDict_SetItemString(py_dmem, "high_water_mark",
156                        PyLong_FromLongLong(dmem.high_water_mark));
157   PyDict_SetItemString(py_dmem, "shared", PyLong_FromLongLong(dmem.shared));
158   PyDict_SetItemString(py_dmem, "text", PyLong_FromLongLong(dmem.text));
159   PyDict_SetItemString(py_dmem, "library", PyLong_FromLongLong(dmem.library));
160   PyDict_SetItemString(py_dmem, "heap", PyLong_FromLongLong(dmem.heap));
161   PyDict_SetItemString(py_dmem, "stack", PyLong_FromLongLong(dmem.stack));
162   PyDict_SetItemString(py_dmem, "pagesize", PyLong_FromLongLong(dmem.pagesize));
163   PyDict_SetItemString(py_dmem, "pte", PyLong_FromLongLong(dmem.pte));
164 
165   return py_dmem;
166 }
167 
gpaw_perf_init()168 int gpaw_perf_init()
169 {
170   int events[NUM_PAPI_EV];
171   events[0] = PAPI_FP_OPS;
172   // events[1] = PAPI_L1_DCM;
173   // events[2] = PAPI_L1_DCH;
174   // events[3] = PAPI_TOT_INS;
175   PAPI_start_counters(events, NUM_PAPI_EV);
176   papi_start_usec_r = PAPI_get_real_usec();
177   papi_start_usec_p = PAPI_get_virt_usec();
178 
179   return 0;
180 }
181 
gpaw_perf_finalize()182 void gpaw_perf_finalize()
183 {
184   long long papi_values[NUM_PAPI_EV];
185   double rtime,ptime;
186   double avegflops;
187   double gflop_opers;
188   PAPI_dmem_info_t dmem;
189   int error = 0;
190   double l1hitratio;
191   long_long papi_end_usec_p;
192   long_long papi_end_usec_r;
193 
194   int rank, numranks;
195 
196   MPI_Comm Comm = MPI_COMM_WORLD;
197 
198   //get papi info, first time it intializes PAPI counters
199   papi_end_usec_r = PAPI_get_real_usec();
200   papi_end_usec_p = PAPI_get_virt_usec();
201 
202   MPI_Comm_size(Comm, &numranks);
203   MPI_Comm_rank(Comm, &rank);
204 
205   FILE *fp;
206   if (rank == 0)
207     fp = fopen("gpaw_perf.log", "w");
208   else
209     fp = NULL;
210 
211   if(PAPI_read_counters(papi_values, NUM_PAPI_EV) != PAPI_OK)
212     error++;
213 
214   if(PAPI_get_dmem_info(&dmem) != PAPI_OK)
215     error++;
216 
217   rtime=(double)(papi_end_usec_r - papi_start_usec_r)/1e6;
218   ptime=(double)(papi_end_usec_p - papi_start_usec_p)/1e6;
219   avegflops=(double)papi_values[0]/rtime/1e9;
220   gflop_opers = (double)papi_values[0]/1e9;
221   // l1hitratio=100.0*(double)papi_values[1]/(papi_values[0] + papi_values[1]);
222 
223   if (rank==0 ) {
224     fprintf(fp,"########  GPAW PERFORMANCE REPORT (PAPI)  ########\n");
225     fprintf(fp,"# MPI tasks   %d\n", numranks);
226     fprintf(fp,"#                        aggregated    average    min(rank/val)   max(rank/val) \n");
227   }
228   coll_print(fp, "Real time (s)", rtime, 1, Comm);
229   coll_print(fp, "Process time (s)", ptime, 1, Comm);
230   coll_print(fp, "Flops (GFlop/s)", avegflops, 1, Comm);
231   coll_print(fp, "Flp-opers (10^9)", gflop_opers, 1, Comm);
232   // coll_print(fp, "L1 hit ratio (%)", l1hitratio, 0, Comm);
233   coll_print(fp, "Peak mem size (MB)", (double)dmem.peak/1.0e3, 0, Comm );
234   coll_print(fp, "Peak resident (MB)", (double)dmem.high_water_mark/1.0e3 ,
235              0, Comm);
236   if(rank==0)  {
237     fflush(fp);
238     fclose(fp);
239   }
240 }
241 #elif GPAW_HPM
242 void HPM_Start(char *);
243 
gpaw_perf_init()244 int gpaw_perf_init()
245 {
246   HPM_Start("GPAW");
247   return 0;
248 }
249 
gpaw_perf_finalize()250 void gpaw_perf_finalize()
251 {
252   HPM_Stop("GPAW");
253 }
254 #else  // Use just MPI_Wtime
255 static double t0;
gpaw_perf_init(void)256 int gpaw_perf_init(void)
257 {
258   t0 = MPI_Wtime();
259   return 0;
260 }
261 
gpaw_perf_finalize(void)262 void gpaw_perf_finalize(void)
263 {
264   double rtime;
265   int rank, numranks;
266 
267   MPI_Comm Comm = MPI_COMM_WORLD;
268 
269   MPI_Comm_size(Comm, &numranks);
270   MPI_Comm_rank(Comm, &rank);
271 
272   double t1 = MPI_Wtime();
273   rtime = t1 - t0;
274 
275   FILE *fp;
276   if (rank == 0)
277     fp = fopen("gpaw_perf.log", "w");
278   else
279     fp = NULL;
280 
281   if (rank==0 ) {
282     fprintf(fp,"########  GPAW PERFORMANCE REPORT (MPI_Wtime)  ########\n");
283     fprintf(fp,"# MPI tasks   %d\n", numranks);
284     fprintf(fp,"#                        aggregated    average    min(rank/val)   max(rank/val) \n");
285   }
286   coll_print(fp, "Real time (s)", rtime, 1, Comm);
287   if(rank==0)  {
288     fflush(fp);
289     fclose(fp);
290   }
291 }
292 #endif
293 #endif
294 
295 // returns the distance between two 3d double vectors
distance(double * a,double * b)296 double distance(double *a, double *b)
297 {
298   double sum = 0;
299   double diff;
300   for (int c = 0; c < 3; c++) {
301     diff = a[c] - b[c];
302     sum += diff*diff;
303   }
304   return sqrt(sum);
305 }
306 
307 
308 // Equivalent to:
309 //
310 //     nt_R += f * abs(psit_R)**2
311 //
add_to_density(PyObject * self,PyObject * args)312 PyObject* add_to_density(PyObject *self, PyObject *args)
313 {
314     double f;
315     PyArrayObject* psit_R_obj;
316     PyArrayObject* nt_R_obj;
317     if (!PyArg_ParseTuple(args, "dOO", &f, &psit_R_obj, &nt_R_obj))
318         return NULL;
319     const double* psit_R = PyArray_DATA(psit_R_obj);
320     double* nt_R = PyArray_DATA(nt_R_obj);
321     int n = PyArray_SIZE(nt_R_obj);
322     if (PyArray_ITEMSIZE(psit_R_obj) == 8) {
323         // Real wave functions
324         // psit_R can be a view of a larger array (psit_R = tmp[:, :, :dim2])
325         int stride2 = PyArray_STRIDE(psit_R_obj, 1) / 8;
326         int dim2 = PyArray_DIM(psit_R_obj, 2);
327         int j = 0;
328         for (int i = 0; i < n;) {
329             for (int k = 0; k < dim2; i++, j++, k++)
330                 nt_R[i] += f * psit_R[j] * psit_R[j];
331             j += stride2 - dim2;
332         }
333     }
334     else
335         // Complex wave functions
336         for (int i = 0; i < n; i++)
337             nt_R[i] += f * (psit_R[2 * i] * psit_R[2 * i] +
338                             psit_R[2 * i + 1] * psit_R[2 * i + 1]);
339     Py_RETURN_NONE;
340 }
341 
342 
utilities_gaussian_wave(PyObject * self,PyObject * args)343 PyObject* utilities_gaussian_wave(PyObject *self, PyObject *args)
344 {
345   Py_complex A_obj;
346   PyArrayObject* r_cG_obj;
347   PyArrayObject* r0_c_obj;
348   Py_complex sigma_obj; // imaginary part ignored
349   PyArrayObject* k_c_obj;
350   PyArrayObject* gs_G_obj;
351 
352   if (!PyArg_ParseTuple(args, "DOODOO", &A_obj, &r_cG_obj, &r0_c_obj, &sigma_obj, &k_c_obj, &gs_G_obj))
353     return NULL;
354 
355   int C, G;
356   C = PyArray_DIMS(r_cG_obj)[0];
357   G = PyArray_DIMS(r_cG_obj)[1];
358   for (int i = 2; i < PyArray_NDIM(r_cG_obj); i++)
359         G *= PyArray_DIMS(r_cG_obj)[i];
360 
361   double* r_cG = DOUBLEP(r_cG_obj); // XXX not ideally strided
362   double* r0_c = DOUBLEP(r0_c_obj);
363   double dr2, kr, alpha = -0.5/pow(sigma_obj.real, 2);
364 
365   int gammapoint = 1;
366   double* k_c = DOUBLEP(k_c_obj);
367   for (int c=0; c<C; c++)
368     gammapoint &= (k_c[c]==0);
369 
370   if (PyArray_DESCR(gs_G_obj)->type_num == NPY_DOUBLE)
371     {
372       double* gs_G = DOUBLEP(gs_G_obj);
373 
374       if(gammapoint)
375         for(int g=0; g<G; g++)
376           {
377             dr2 = pow(r_cG[g]-r0_c[0], 2);
378             for(int c=1; c<C; c++)
379               dr2 += pow(r_cG[c*G+g]-r0_c[c], 2);
380             gs_G[g] = A_obj.real*exp(alpha*dr2);
381           }
382       else if(sigma_obj.real>0)
383         for(int g=0; g<G; g++)
384           {
385             kr = k_c[0]*r_cG[g];
386             dr2 = pow(r_cG[g]-r0_c[0], 2);
387             for(int c=1; c<C; c++)
388               {
389                 kr += k_c[c]*r_cG[c*G+g];
390                 dr2 += pow(r_cG[c*G+g]-r0_c[c], 2);
391               }
392             kr = A_obj.real*cos(kr)-A_obj.imag*sin(kr);
393             gs_G[g] = kr*exp(alpha*dr2);
394           }
395       else
396         {
397           return NULL; //TODO sigma<=0 could be exp(-(r-r0)^2/2sigma^2) -> 1 ?
398         }
399     }
400   else
401     {
402       double_complex* gs_G = COMPLEXP(gs_G_obj);
403       double_complex A = A_obj.real+I*A_obj.imag;
404 
405       if(gammapoint)
406         for(int g=0; g<G; g++)
407           {
408             dr2 = pow(r_cG[g]-r0_c[0], 2);
409             for(int c=1; c<C; c++)
410               dr2 += pow(r_cG[c*G+g]-r0_c[c], 2);
411             gs_G[g] = A*exp(alpha*dr2);
412           }
413       else if(sigma_obj.real>0)
414         for(int g=0; g<G; g++)
415           {
416             kr = k_c[0]*r_cG[g];
417             dr2 = pow(r_cG[g]-r0_c[0], 2);
418             for(int c=1; c<C; c++)
419               {
420                 kr += k_c[c]*r_cG[c*G+g];
421                 dr2 += pow(r_cG[c*G+g]-r0_c[c], 2);
422               }
423             double_complex f = A*cexp(I*kr);
424             gs_G[g] = f*exp(alpha*dr2);
425           }
426       else
427         {
428           return NULL; //TODO sigma<=0 could be exp(-(r-r0)^2/2sigma^2) -> 1 ?
429         }
430     }
431 
432   Py_RETURN_NONE;
433 }
434 
435 
pack(PyObject * self,PyObject * args)436 PyObject* pack(PyObject *self, PyObject *args)
437 {
438     PyArrayObject* a_obj;
439     if (!PyArg_ParseTuple(args, "O", &a_obj))
440         return NULL;
441     a_obj = PyArray_GETCONTIGUOUS(a_obj);
442     int n = PyArray_DIMS(a_obj)[0];
443     npy_intp dims[1] = {n * (n + 1) / 2};
444     int typenum = PyArray_DESCR(a_obj)->type_num;
445     PyArrayObject* b_obj = (PyArrayObject*) PyArray_SimpleNew(1, dims,
446                                                               typenum);
447     if (b_obj == NULL)
448       return NULL;
449     if (typenum == NPY_DOUBLE) {
450         double* a = (double*)PyArray_DATA(a_obj);
451         double* b = (double*)PyArray_DATA(b_obj);
452         for (int r = 0; r < n; r++) {
453             *b++ = a[r + n * r];
454             for (int c = r + 1; c < n; c++)
455                 *b++ = a[r + n * c] + a[c + n * r];
456         }
457     } else {
458         double complex* a = (double complex*)PyArray_DATA(a_obj);
459         double complex* b = (double complex*)PyArray_DATA(b_obj);
460         for (int r = 0; r < n; r++) {
461             *b++ = a[r + n * r];
462             for (int c = r + 1; c < n; c++)
463                 *b++ = a[r + n * c] + a[c + n * r];
464         }
465     }
466     Py_DECREF(a_obj);
467     PyObject* value = Py_BuildValue("O", b_obj);
468     Py_DECREF(b_obj);
469     return value;
470 }
471 
472 
unpack(PyObject * self,PyObject * args)473 PyObject* unpack(PyObject *self, PyObject *args)
474 {
475   PyArrayObject* ap;
476   PyArrayObject* a;
477   if (!PyArg_ParseTuple(args, "OO", &ap, &a))
478     return NULL;
479   int n = PyArray_DIMS(a)[0];
480   double* datap = DOUBLEP(ap);
481   double* data = DOUBLEP(a);
482   for (int r = 0; r < n; r++)
483     for (int c = r; c < n; c++)
484       {
485         double d = *datap++;
486         data[c + r * n] = d;
487         data[r + c * n] = d;
488       }
489   Py_RETURN_NONE;
490 }
491 
unpack_complex(PyObject * self,PyObject * args)492 PyObject* unpack_complex(PyObject *self, PyObject *args)
493 {
494   PyArrayObject* ap;
495   PyArrayObject* a;
496   if (!PyArg_ParseTuple(args, "OO", &ap, &a))
497     return NULL;
498   int n = PyArray_DIMS(a)[0];
499   double_complex* datap = COMPLEXP(ap);
500   double_complex* data = COMPLEXP(a);
501   for (int r = 0; r < n; r++)
502     for (int c = r; c < n; c++)
503       {
504         double_complex d = *datap++;
505         data[c + r * n] = d;
506         data[r + c * n] = conj(d);
507       }
508   Py_RETURN_NONE;
509 }
510 
hartree(PyObject * self,PyObject * args)511 PyObject* hartree(PyObject *self, PyObject *args)
512 {
513     int l;
514     PyArrayObject* nrdr_obj;
515     PyArrayObject* r_obj;
516     PyArrayObject* vr_obj;
517     if (!PyArg_ParseTuple(args, "iOOO", &l, &nrdr_obj, &r_obj, &vr_obj))
518         return NULL;
519 
520     const int M = PyArray_DIM(nrdr_obj, 0);
521     const double* nrdr = DOUBLEP(nrdr_obj);
522     const double* r = DOUBLEP(r_obj);
523     double* vr = DOUBLEP(vr_obj);
524 
525     double p = 0.0;
526     double q = 0.0;
527     for (int g = M - 1; g > 0; g--)
528     {
529         double R = r[g];
530         double rl = pow(R, l);
531         double dp = nrdr[g] / rl;
532         double rlp1 = rl * R;
533         double dq = nrdr[g] * rlp1;
534         vr[g] = (p + 0.5 * dp) * rlp1 - (q + 0.5 * dq) / rl;
535         p += dp;
536         q += dq;
537     }
538     vr[0] = 0.0;
539     double f = 4.0 * M_PI / (2 * l + 1);
540     for (int g = 1; g < M; g++)
541     {
542         double R = r[g];
543         vr[g] = f * (vr[g] + q / pow(R, l));
544     }
545     Py_RETURN_NONE;
546 }
547 
localize(PyObject * self,PyObject * args)548 PyObject* localize(PyObject *self, PyObject *args)
549 {
550   PyArrayObject* Z_nnc;
551   PyArrayObject* U_nn;
552   if (!PyArg_ParseTuple(args, "OO", &Z_nnc, &U_nn))
553     return NULL;
554 
555   int n = PyArray_DIMS(U_nn)[0];
556   double complex (*Z)[n][3] = (double complex (*)[n][3])COMPLEXP(Z_nnc);
557   double (*U)[n] = (double (*)[n])DOUBLEP(U_nn);
558 
559   double value = 0.0;
560   for (int a = 0; a < n; a++)
561     {
562       for (int b = a + 1; b < n; b++)
563         {
564           double complex* Zaa = Z[a][a];
565           double complex* Zab = Z[a][b];
566           double complex* Zbb = Z[b][b];
567           double x = 0.0;
568           double y = 0.0;
569           for (int c = 0; c < 3; c++)
570             {
571               x += (0.25 * creal(Zbb[c] * conj(Zbb[c])) +
572                     0.25 * creal(Zaa[c] * conj(Zaa[c])) -
573                     0.5 * creal(Zaa[c] * conj(Zbb[c])) -
574                     creal(Zab[c] * conj(Zab[c])));
575               y += creal((Zaa[c] - Zbb[c]) * conj(Zab[c]));
576             }
577           double t = 0.25 * atan2(y, x);
578           double C = cos(t);
579           double S = sin(t);
580           for (int i = 0; i < a; i++)
581             for (int c = 0; c < 3; c++)
582               {
583                 double complex Ziac = Z[i][a][c];
584                 Z[i][a][c] = C * Ziac + S * Z[i][b][c];
585                 Z[i][b][c] = C * Z[i][b][c] - S * Ziac;
586               }
587           for (int c = 0; c < 3; c++)
588             {
589               double complex Zaac = Zaa[c];
590               double complex Zabc = Zab[c];
591               double complex Zbbc = Zbb[c];
592               Zaa[c] = C * C * Zaac + 2 * C * S * Zabc + S * S * Zbbc;
593               Zbb[c] = C * C * Zbbc - 2 * C * S * Zabc + S * S * Zaac;
594               Zab[c] = S * C * (Zbbc - Zaac) + (C * C - S * S) * Zabc;
595             }
596           for (int i = a + 1; i < b; i++)
597             for (int c = 0; c < 3; c++)
598               {
599                 double complex Zaic = Z[a][i][c];
600                 Z[a][i][c] = C * Zaic + S * Z[i][b][c];
601                 Z[i][b][c] = C * Z[i][b][c] - S * Zaic;
602               }
603           for (int i = b + 1; i < n; i++)
604             for (int c = 0; c < 3; c++)
605               {
606                 double complex Zaic = Z[a][i][c];
607                 Z[a][i][c] = C * Zaic + S * Z[b][i][c];
608                 Z[b][i][c] = C * Z[b][i][c] - S * Zaic;
609               }
610           for (int i = 0; i < n; i++)
611             {
612               double Uia = U[i][a];
613               U[i][a] = C * Uia + S * U[i][b];
614               U[i][b] = C * U[i][b] - S * Uia;
615             }
616         }
617       double complex* Zaa = Z[a][a];
618       for (int c = 0; c < 3; c++)
619         value += creal(Zaa[c] * conj(Zaa[c]));
620     }
621   return Py_BuildValue("d", value);
622 }
623 
624 
spherical_harmonics(PyObject * self,PyObject * args)625 PyObject* spherical_harmonics(PyObject *self, PyObject *args)
626 {
627   int l;
628   PyArrayObject* R_obj_c;
629   PyArrayObject* Y_obj_m;
630   if (!PyArg_ParseTuple(args, "iOO", &l, &R_obj_c, &Y_obj_m))
631     return NULL;
632 
633   double* R_c = DOUBLEP(R_obj_c);
634   double* Y_m = DOUBLEP(Y_obj_m);
635 
636   if (l == 0)
637       Y_m[0] = 0.28209479177387814;
638   else
639     {
640       double x = R_c[0];
641       double y = R_c[1];
642       double z = R_c[2];
643       if (l == 1)
644         {
645           Y_m[0] = 0.48860251190291992 * y;
646           Y_m[1] = 0.48860251190291992 * z;
647           Y_m[2] = 0.48860251190291992 * x;
648         }
649       else
650         {
651           double r2 = x*x+y*y+z*z;
652           if (l == 2)
653             {
654               Y_m[0] = 1.0925484305920792 * x*y;
655               Y_m[1] = 1.0925484305920792 * y*z;
656               Y_m[2] = 0.31539156525252005 * (3*z*z-r2);
657               Y_m[3] = 1.0925484305920792 * x*z;
658               Y_m[4] = 0.54627421529603959 * (x*x-y*y);
659             }
660           else if (l == 3)
661             {
662               Y_m[0] = 0.59004358992664352 * (-y*y*y+3*x*x*y);
663               Y_m[1] = 2.8906114426405538 * x*y*z;
664               Y_m[2] = 0.45704579946446577 * (-y*r2+5*y*z*z);
665               Y_m[3] = 0.3731763325901154 * (5*z*z*z-3*z*r2);
666               Y_m[4] = 0.45704579946446577 * (5*x*z*z-x*r2);
667               Y_m[5] = 1.4453057213202769 * (x*x*z-y*y*z);
668               Y_m[6] = 0.59004358992664352 * (x*x*x-3*x*y*y);
669             }
670           else if (l == 4)
671             {
672               Y_m[0] = 2.5033429417967046 * (x*x*x*y-x*y*y*y);
673               Y_m[1] = 1.7701307697799307 * (-y*y*y*z+3*x*x*y*z);
674               Y_m[2] = 0.94617469575756008 * (-x*y*r2+7*x*y*z*z);
675               Y_m[3] = 0.66904654355728921 * (-3*y*z*r2+7*y*z*z*z);
676               Y_m[4] = 0.10578554691520431 * (-30*z*z*r2+3*r2*r2+35*z*z*z*z);
677               Y_m[5] = 0.66904654355728921 * (7*x*z*z*z-3*x*z*r2);
678               Y_m[6] = 0.47308734787878004 * (-x*x*r2+7*x*x*z*z+y*y*r2-7*y*y*z*z);
679               Y_m[7] = 1.7701307697799307 * (x*x*x*z-3*x*y*y*z);
680               Y_m[8] = 0.62583573544917614 * (-6*x*x*y*y+x*x*x*x+y*y*y*y);
681             }
682           else if (l == 5)
683             {
684               Y_m[0] = 0.65638205684017015 * (y*y*y*y*y+5*x*x*x*x*y-10*x*x*y*y*y);
685               Y_m[1] = 8.3026492595241645 * (x*x*x*y*z-x*y*y*y*z);
686               Y_m[2] = 0.48923829943525038 * (y*y*y*r2-9*y*y*y*z*z-3*x*x*y*r2+27*x*x*y*z*z);
687               Y_m[3] = 4.7935367849733241 * (3*x*y*z*z*z-x*y*z*r2);
688               Y_m[4] = 0.45294665119569694 * (-14*y*z*z*r2+y*r2*r2+21*y*z*z*z*z);
689               Y_m[5] = 0.1169503224534236 * (63*z*z*z*z*z+15*z*r2*r2-70*z*z*z*r2);
690               Y_m[6] = 0.45294665119569694 * (x*r2*r2-14*x*z*z*r2+21*x*z*z*z*z);
691               Y_m[7] = 2.3967683924866621 * (-3*y*y*z*z*z+y*y*z*r2+3*x*x*z*z*z-x*x*z*r2);
692               Y_m[8] = 0.48923829943525038 * (9*x*x*x*z*z-27*x*y*y*z*z-x*x*x*r2+3*x*y*y*r2);
693               Y_m[9] = 2.0756623148810411 * (y*y*y*y*z-6*x*x*y*y*z+x*x*x*x*z);
694               Y_m[10] = 0.65638205684017015 * (-10*x*x*x*y*y+5*x*y*y*y*y+x*x*x*x*x);
695             }
696           else if (l == 6)
697             {
698               Y_m[0] = 1.3663682103838286 * (-10*x*x*x*y*y*y+3*x*x*x*x*x*y+3*x*y*y*y*y*y);
699               Y_m[1] = 2.3666191622317521 * (y*y*y*y*y*z-10*x*x*y*y*y*z+5*x*x*x*x*y*z);
700               Y_m[2] = 2.0182596029148967 * (-x*x*x*y*r2+x*y*y*y*r2-11*x*y*y*y*z*z+11*x*x*x*y*z*z);
701               Y_m[3] = 0.92120525951492349 * (-11*y*y*y*z*z*z-9*x*x*y*z*r2+33*x*x*y*z*z*z+3*y*y*y*z*r2);
702               Y_m[4] =0.92120525951492349 * (x*y*r2*r2+33*x*y*z*z*z*z-18*x*y*z*z*r2);
703               Y_m[5] = 0.58262136251873142 * (5*y*z*r2*r2+33*y*z*z*z*z*z-30*y*z*z*z*r2);
704               Y_m[6] = 0.063569202267628425 * (231*z*z*z*z*z*z-5*r2*r2*r2+105*z*z*r2*r2-315*z*z*z*z*r2);
705               Y_m[7] = 0.58262136251873142 * (-30*x*z*z*z*r2+33*x*z*z*z*z*z+5*x*z*r2*r2);
706               Y_m[8] = 0.46060262975746175 * (33*x*x*z*z*z*z+x*x*r2*r2-y*y*r2*r2-18*x*x*z*z*r2+18*y*y*z*z*r2-33*y*y*z*z*z*z);
707               Y_m[9] = 0.92120525951492349 * (-3*x*x*x*z*r2-33*x*y*y*z*z*z+9*x*y*y*z*r2+11*x*x*x*z*z*z);
708               Y_m[10] = 0.50456490072872417 * (11*y*y*y*y*z*z-66*x*x*y*y*z*z-x*x*x*x*r2+6*x*x*y*y*r2+11*x*x*x*x*z*z-y*y*y*y*r2);
709               Y_m[11] = 2.3666191622317521 * (5*x*y*y*y*y*z+x*x*x*x*x*z-10*x*x*x*y*y*z);
710               Y_m[12] = 0.6831841051919143 * (x*x*x*x*x*x+15*x*x*y*y*y*y-15*x*x*x*x*y*y-y*y*y*y*y*y);
711             }
712         }
713     }
714   Py_RETURN_NONE;
715 }
716