1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 */
31 
32 #ifndef MADNESS_MRA_CONVOLUTION1D_H__INCLUDED
33 #define MADNESS_MRA_CONVOLUTION1D_H__INCLUDED
34 
35 #include <madness/world/vector.h>
36 #include <madness/constants.h>
37 #include <limits.h>
38 #include <madness/tensor/tensor.h>
39 #include <madness/mra/simplecache.h>
40 #include <madness/mra/adquad.h>
41 #include <madness/mra/twoscale.h>
42 #include <madness/tensor/aligned.h>
43 #include <madness/tensor/tensor_lapack.h>
44 #include <algorithm>
45 
46 /// \file mra/convolution1d.h
47 /// \brief Compuates most matrix elements over 1D operators (including Gaussians)
48 
49 /// \ingroup function
50 
51 namespace madness {
52 
53     void aligned_add(long n, double* MADNESS_RESTRICT a, const double* MADNESS_RESTRICT b);
54     void aligned_sub(long n, double* MADNESS_RESTRICT a, const double* MADNESS_RESTRICT b);
55     void aligned_add(long n, double_complex* MADNESS_RESTRICT a, const double_complex* MADNESS_RESTRICT b);
56     void aligned_sub(long n, double_complex* MADNESS_RESTRICT a, const double_complex* MADNESS_RESTRICT b);
57 
58     template <typename T>
copy_2d_patch(T * MADNESS_RESTRICT out,long ldout,const T * MADNESS_RESTRICT in,long ldin,long n,long m)59     static void copy_2d_patch(T* MADNESS_RESTRICT out, long ldout, const T* MADNESS_RESTRICT in, long ldin, long n, long m) {
60         for (long i=0; i<n; ++i, out+=ldout, in+=ldin) {
61             for (long j=0; j<m; ++j) {
62                 out[j] = in[j];
63             }
64         }
65     }
66 
67     /// a(n,m) --> b(m,n) ... optimized for smallish matrices
68     template <typename T>
fast_transpose(long n,long m,const T * a,T * MADNESS_RESTRICT b)69     inline void fast_transpose(long n, long m, const T* a, T* MADNESS_RESTRICT b) {
70         // n will always be k or 2k (k=wavelet order) and m will be anywhere
71         // from 2^(NDIM-1) to (2k)^(NDIM-1).
72 
73 //                  for (long i=0; i<n; ++i)
74 //                      for (long j=0; j<m; ++j)
75 //                          b[j*n+i] = a[i*m+j];
76 //                  return;
77 
78         if (n==1 || m==1) {
79             long nm=n*m;
80             for (long i=0; i<nm; ++i) b[i] = a[i];
81             return;
82         }
83 
84         long n4 = (n>>2)<<2;
85         long m4 = m<<2;
86         const T* a0 = a;
87         for (long i=0; i<n4; i+=4, a0+=m4) {
88             const T* a1 = a0+m;
89             const T* a2 = a1+m;
90             const T* a3 = a2+m;
91             T* MADNESS_RESTRICT bi = b+i;
92             for (long j=0; j<m; ++j, bi+=n) {
93                 T tmp0 = a0[j];
94                 T tmp1 = a1[j];
95                 T tmp2 = a2[j];
96                 T tmp3 = a3[j];
97 
98                 bi[0] = tmp0;
99                 bi[1] = tmp1;
100                 bi[2] = tmp2;
101                 bi[3] = tmp3;
102             }
103         }
104 
105         for (long i=n4; i<n; ++i)
106             for (long j=0; j<m; ++j)
107                 b[j*n+i] = a[i*m+j];
108 
109     }
110 
111     // /// a(i,j) --> b(i,j) for i=0..n-1 and j=0..r-1 noting dimensions are a(n,m) and b(n,r).
112 
113     // /// returns b
114     // template <typename T>
115     // inline T* shrink(long n, long m, long r, const T* a, T* MADNESS_RESTRICT b) {
116     //     T* result = b;
117     //     if (r == 0) {
118     //         ;
119     //     }
120     //     else if (r == 1) {
121     //         for (long i=0; i<n; ++i) {
122     //             b[i] = a[i];
123     //         }
124     //     }
125     //     else if (r == 2) {
126     //         for (long i=0; i<n; ++i, a+=m, b+=2) {
127     //             b[0] = a[0];
128     //             b[1] = a[1];
129     //         }
130     //     }
131     //     else if (r == 3) {
132     //         for (long i=0; i<n; ++i, a+=m, b+=3) {
133     //             b[0] = a[0];
134     //             b[1] = a[1];
135     //             b[2] = a[2];
136     //         }
137     //     }
138     //     else if (r == 4) {
139     //         for (long i=0; i<n; ++i, a+=m, b+=4) {
140     //             b[0] = a[0];
141     //             b[1] = a[1];
142     //             b[2] = a[2];
143     //             b[3] = a[3];
144     //         }
145     //     }
146     //     else {
147     //         for (long i=0; i<n; ++i, a+=m, b+=r) {
148     //             for (long j=0; j<r; j++) {
149     //                 b[j] = a[j];
150     //             }
151     //         }
152     //     }
153     //     return result;
154     // }
155 
156     /// actual data for 1 dimension and for 1 term and for 1 displacement for a convolution operator
157     /// here we keep the transformation matrices
158 
159     /// !!! Note that if Rnormf is zero then ***ALL*** of the tensors are empty
160     template <typename Q>
161     struct ConvolutionData1D {
162 
163 
164         Tensor<Q> R, T;                 ///< if NS: R=ns, T=T part of ns; if modified NS: T=\uparrow r^(n-1)
165         Tensor<Q> RU, RVT, TU, TVT;     ///< SVD approximations to R and T
166         Tensor<typename Tensor<Q>::scalar_type> Rs, Ts;     ///< hold relative errors, NOT the singular values..
167 
168         // norms for NS form
169         double Rnorm, Tnorm, Rnormf, Tnormf, NSnormf;
170 
171         // norms for modified NS form
172         double N_up, N_diff, N_F;               ///< the norms according to Beylkin 2008, Eq. (21) ff
173 
174 
175         /// ctor for NS form
176         /// make the operator matrices r^n and \uparrow r^(n-1)
177         /// @param[in]  R   operator matrix of the requested level;     NS: unfilter(r^(n+1)); modified NS: r^n
178         /// @param[in]  T   upsampled operator matrix from level n-1;   NS: r^n; modified NS: filter( r^(n-1) )
ConvolutionData1DConvolutionData1D179         ConvolutionData1D(const Tensor<Q>& R, const Tensor<Q>& T) : R(R), T(T) {
180             Rnormf = R.normf();
181             // Making the approximations is expensive ... only do it for
182             // significant components
183             if (Rnormf > 1e-20) {
184                 Tnormf = T.normf();
185                 make_approx(T, TU, Ts, TVT, Tnorm);
186                 make_approx(R, RU, Rs, RVT, Rnorm);
187                 int k = T.dim(0);
188 
189                 Tensor<Q> NS = copy(R);
190                 for (int i=0; i<k; ++i)
191                     for (int j=0; j<k; ++j)
192                         NS(i,j) = 0.0;
193                 NSnormf = NS.normf();
194 
195             }
196             else {
197                 Rnorm = Tnorm = Rnormf = Tnormf = NSnormf = 0.0;
198                 N_F = N_up = N_diff = 0.0;
199             }
200         }
201 
202         /// ctor for modified NS form
203         /// make the operator matrices r^n and \uparrow r^(n-1)
204         /// @param[in]  R   operator matrix of the requested level;     NS: unfilter(r^(n+1)); modified NS: r^n
205         /// @param[in]  T   upsampled operator matrix from level n-1;   NS: r^n; modified NS: filter( r^(n-1) )
206         /// @param[in]  modified    use (un) modified NS form
ConvolutionData1DConvolutionData1D207         ConvolutionData1D(const Tensor<Q>& R, const Tensor<Q>& T, const bool modified) : R(R), T(T) {
208 
209             // note that R can be small, but T still be large
210 
211             Rnormf = R.normf();
212             Tnormf = T.normf();
213             // Making the approximations is expensive ... only do it for
214             // significant components
215             if (Rnormf > 1e-20) make_approx(R, RU, Rs, RVT, Rnorm);
216             if (Tnormf > 1e-20) make_approx(T, TU, Ts, TVT, Tnorm);
217 
218             // norms for modified NS form: follow Beylkin, 2008, Eq. (21) ff
219             N_F=Rnormf;
220             N_up=Tnormf;
221             N_diff=(R-T).normf();
222         }
223 
224 
225 
226         /// approximate the operator matrices using SVD, and abuse Rs to hold the error instead of
227         /// the singular values (seriously, who named this??)
make_approxConvolutionData1D228         void make_approx(const Tensor<Q>& R,
229                          Tensor<Q>& RU, Tensor<typename Tensor<Q>::scalar_type>& Rs, Tensor<Q>& RVT, double& norm) {
230             int n = R.dim(0);
231             svd(R, RU, Rs, RVT);
232             for (int i=0; i<n; ++i) {
233                 for (int j=0; j<n; ++j) {
234                     RVT(i,j) *= Rs[i];
235                 }
236             }
237             for (int i=n-1; i>1; --i) { // Form cumulative sum of norms
238                 Rs[i-1] += Rs[i];
239             }
240 
241             norm = Rs[0];
242             if (Rs[0]>0.0) { // Turn into relative errors
243                 double rnorm = 1.0/norm;
244                 for (int i=0; i<n; ++i) {
245                     Rs[i] *= rnorm;
246                 }
247             }
248         }
249     };
250 
251     /// Provides the common functionality/interface of all 1D convolutions
252 
253     /// interface for 1 term and for 1 dimension;
254     /// the actual data are kept in ConvolutionData1D
255     /// Derived classes must implement rnlp, issmall, natural_level
256     template <typename Q>
257     class Convolution1D {
258     public:
259         typedef Q opT;  ///< The apply function uses this to infer resultT=opT*inputT
260         int k;          ///< Wavelet order
261         int npt;        ///< Number of quadrature points (is this used?)
262         int maxR;       ///< Number of lattice translations for sum
263         Tensor<double> quad_x;
264         Tensor<double> quad_w;
265         Tensor<double> c;
266         Tensor<double> hgT, hg;
267         Tensor<double> hgT2k;
268         double arg;
269 
270         mutable SimpleCache<Tensor<Q>, 1> rnlp_cache;
271         mutable SimpleCache<Tensor<Q>, 1> rnlij_cache;
272         mutable SimpleCache<ConvolutionData1D<Q>, 1> ns_cache;
273         mutable SimpleCache<ConvolutionData1D<Q>, 2> mod_ns_cache;
274 
~Convolution1D()275         virtual ~Convolution1D() {};
276 
277         Convolution1D(int k, int npt, int maxR, double arg = 0.0)
k(k)278                 : k(k)
279                 , npt(npt)
280                 , maxR(maxR)
281                 , quad_x(npt)
282                 , quad_w(npt)
283                 , arg(arg)
284         {
285             auto success = autoc(k,&c);
286             MADNESS_ASSERT(success);
287 
288             gauss_legendre(npt,0.0,1.0,quad_x.ptr(),quad_w.ptr());
289             success = two_scale_hg(k,&hg);
290             MADNESS_ASSERT(success);
291             hgT = transpose(hg);
292             success = two_scale_hg(2*k,&hgT2k);
293             MADNESS_ASSERT(success);
294             hgT2k = transpose(hgT2k);
295 
296             // Cannot construct the coefficients here since the
297             // derived class is not yet constructed so cannot call
298             // (even indirectly) a virtual method
299         }
300 
301         /// Compute the projection of the operator onto the double order polynomials
302         virtual Tensor<Q> rnlp(Level n, Translation lx) const = 0;
303 
304         /// Returns true if the block of rnlp is expected to be small
305         virtual bool issmall(Level n, Translation lx) const = 0;
306 
307         /// Returns true if the block of rnlp is expected to be small including periodicity
get_issmall(Level n,Translation lx)308         bool get_issmall(Level n, Translation lx) const {
309             if (maxR == 0) {
310                 return issmall(n, lx);
311             }
312             else {
313                 Translation twon = Translation(1)<<n;
314                 for (int R=-maxR; R<=maxR; ++R) {
315                     if (!issmall(n, R*twon+lx)) return false;
316                 }
317                 return true;
318             }
319         }
320 
321         /// Returns the level for projection
322         //virtual Level natural_level() const {
323         //    return 13;
324         //}
natural_level()325         virtual Level natural_level() const {return 13;}
326 
327         /// Computes the transition matrix elements for the convolution for n,l
328 
329         /// Returns the tensor
330         /// \code
331         ///   r(i,j) = int(K(x-y) phi[n0](x) phi[nl](y), x=0..1, y=0..1)
332         /// \endcode
333         /// This is computed from the matrix elements over the correlation
334         /// function which in turn are computed from the matrix elements
335         /// over the double order legendre polynomials.
336         const Tensor<Q>& rnlij(Level n, Translation lx, bool do_transpose=false) const {
337             const Tensor<Q>* p=rnlij_cache.getptr(n,lx);
338             if (p) return *p;
339 
340             // PROFILE_MEMBER_FUNC(Convolution1D); // Too fine grain for routine profiling
341 
342             long twok = 2*k;
343             Tensor<Q> R(2*twok);
344             R(Slice(0,twok-1)) = get_rnlp(n,lx-1);
345             R(Slice(twok,2*twok-1)) = get_rnlp(n,lx);
346 
347             R.scale(pow(0.5,0.5*n));
348             R = inner(c,R);
349             if (do_transpose) R = transpose(R);
350             rnlij_cache.set(n,lx,R);
351             return *rnlij_cache.getptr(n,lx);
352         };
353 
354 
355         /// Returns a pointer to the cached modified nonstandard form of the operator
356 
357         /// @param[in]  op_key  holds the scale and the source and target translations
358         /// @return     a pointer to the cached modified nonstandard form of the operator
mod_nonstandard(const Key<2> & op_key)359         const ConvolutionData1D<Q>* mod_nonstandard(const Key<2>& op_key) const {
360 
361             const Level& n=op_key.level();
362             const Translation& sx=op_key.translation()[0];      // source translation
363             const Translation& tx=op_key.translation()[1];      // target translation
364             const Translation  lx=tx-sx;                        // displacement
365             const Translation  s_off=sx%2;
366             const Translation  t_off=tx%2;
367 
368             // we cache translation and source offset
369             const Key<2> cache_key(n, Vector<Translation,2>{lx, s_off} );
370             const ConvolutionData1D<Q>* p = mod_ns_cache.getptr(cache_key);
371             if (p) return p;
372 
373             // for paranoid me
374             MADNESS_ASSERT(sx>=0 and tx>=0);
375 
376 
377             Tensor<Q> R, T, Rm;
378 //            if (!get_issmall(n, lx)) {
379 //                print("no issmall", lx, source, n);
380 
381                 const Translation lx_half = tx/2 - sx/2;
382                 const Slice s0(0,k-1), s1(k,2*k-1);
383 //                print("sx, tx",lx,lx_half,sx, tx,"off",s_off,t_off);
384 
385                 // this is the operator matrix in its actual level
386                 R = rnlij(n,lx);
387 
388                 // this is the upsampled operator matrix
389                 Rm = Tensor<Q>(2*k,2*k);
390                 if (n>0) Rm(s0,s0)=rnlij(n-1,lx_half);
391                 {
392                     // PROFILE_BLOCK(Convolution1D_nstran); // Too fine grain for routine profiling
393                     Rm = transform(Rm,hg);
394                 }
395 
396                 {
397                     // PROFILE_BLOCK(Convolution1D_nscopy); // Too fine grain for routine profiling
398                     T=Tensor<Q>(k,k);
399                     if (t_off==0 and s_off==0) T=copy(Rm(s0,s0));
400                     if (t_off==0 and s_off==1) T=copy(Rm(s0,s1));
401                     if (t_off==1 and s_off==0) T=copy(Rm(s1,s0));
402                     if (t_off==1 and s_off==1) T=copy(Rm(s1,s1));
403 //                    if (t_off==0 and s_off==0) T=copy(Rm(s0,s0));
404 //                    if (t_off==1 and s_off==0) T=copy(Rm(s0,s1));
405 //                    if (t_off==0 and s_off==1) T=copy(Rm(s1,s0));
406 //                    if (t_off==1 and s_off==1) T=copy(Rm(s1,s1));
407                 }
408 
409                 {
410                     // PROFILE_BLOCK(Convolution1D_trans); // Too fine grain for routine profiling
411 
412                     Tensor<Q> RT(k,k), TT(k,k);
413                     fast_transpose(k,k,R.ptr(), RT.ptr());
414                     fast_transpose(k,k,T.ptr(), TT.ptr());
415                     R = RT;
416                     T = TT;
417                 }
418 
419 //            }
420 
421             mod_ns_cache.set(cache_key,ConvolutionData1D<Q>(R,T,true));
422             return mod_ns_cache.getptr(cache_key);
423         }
424 
425         /// Returns a pointer to the cached nonstandard form of the operator
nonstandard(Level n,Translation lx)426         const ConvolutionData1D<Q>* nonstandard(Level n, Translation lx) const {
427             const ConvolutionData1D<Q>* p = ns_cache.getptr(n,lx);
428             if (p) return p;
429 
430             // PROFILE_MEMBER_FUNC(Convolution1D); // Too fine grain for routine profiling
431 
432             Tensor<Q> R, T;
433             if (!get_issmall(n, lx)) {
434                 Translation lx2 = lx*2;
435 #if 0 // UNUSED VARIABLES
436                 Slice s0(0,k-1), s1(k,2*k-1);
437 #endif
438                 const Tensor<Q> r0 = rnlij(n+1,lx2);
439                 const Tensor<Q> rp = rnlij(n+1,lx2+1);
440                 const Tensor<Q> rm = rnlij(n+1,lx2-1);
441 
442                 R = Tensor<Q>(2*k,2*k);
443 
444 //                 R(s0,s0) = r0;
445 //                 R(s1,s1) = r0;
446 //                 R(s1,s0) = rp;
447 //                 R(s0,s1) = rm;
448 
449                 {
450                     // PROFILE_BLOCK(Convolution1D_nscopy); // Too fine grain for routine profiling
451                     copy_2d_patch(R.ptr(),           2*k, r0.ptr(), k, k, k);
452                     copy_2d_patch(R.ptr()+2*k*k + k, 2*k, r0.ptr(), k, k, k);
453                     copy_2d_patch(R.ptr()+2*k*k,     2*k, rp.ptr(), k, k, k);
454                     copy_2d_patch(R.ptr()       + k, 2*k, rm.ptr(), k, k, k);
455                 }
456 
457                 //print("R ", n, lx, R.normf(), r0.normf(), rp.normf(), rm.normf());
458 
459 
460                 {
461                     // PROFILE_BLOCK(Convolution1D_nstran); // Too fine grain for routine profiling
462                     R = transform(R,hgT);
463                 }
464 
465                 //print("RX", n, lx, R.normf(), r0.normf(), rp.normf(), rm.normf());
466 
467                 {
468                     // PROFILE_BLOCK(Convolution1D_trans); // Too fine grain for routine profiling
469 
470                     Tensor<Q> RT(2*k,2*k);
471                     fast_transpose(2*k, 2*k, R.ptr(), RT.ptr());
472                     R = RT;
473 
474                     //print("RT", n, lx, R.normf(), r0.normf(), rp.normf(), rm.normf());
475 
476                     //T = copy(R(s0,s0));
477                     T = Tensor<Q>(k,k);
478                     copy_2d_patch(T.ptr(), k, R.ptr(), 2*k, k, k);
479                 }
480 
481                 //print("NS", n, lx, R.normf(), T.normf());
482             }
483 
484             ns_cache.set(n,lx,ConvolutionData1D<Q>(R,T));
485 
486             return ns_cache.getptr(n,lx);
487         };
488 
phase(double R)489         Q phase(double R) const {
490         	return 1.0;
491         }
492 
phase(double_complex R)493         Q phase(double_complex R) const {
494         	return exp(double_complex(0.0,arg)*R);
495         }
496 
497 
get_rnlp(Level n,Translation lx)498         const Tensor<Q>& get_rnlp(Level n, Translation lx) const {
499             const Tensor<Q>* p=rnlp_cache.getptr(n,lx);
500             if (p) return *p;
501 
502             // PROFILE_MEMBER_FUNC(Convolution1D); // Too fine grain for routine profiling
503 
504             long twok = 2*k;
505             Tensor<Q> r;
506 
507             if (get_issmall(n, lx)) {
508                 r = Tensor<Q>(twok);
509             }
510             else if (n < natural_level()) {
511                 Tensor<Q>  R(2*twok);
512                 R(Slice(0,twok-1)) = get_rnlp(n+1,2*lx);
513                 R(Slice(twok,2*twok-1)) = get_rnlp(n+1,2*lx+1);
514 
515                 R = transform(R, hgT2k);
516                 r = copy(R(Slice(0,twok-1)));
517             }
518             else {
519                 // PROFILE_BLOCK(Convolution1Drnlp); // Too fine grain for routine profiling
520 
521                 if (maxR > 0) {
522                     Translation twon = Translation(1)<<n;
523                     r = Tensor<Q>(2*k);
524                     for (int R=-maxR; R<=maxR; ++R) {
525                         r.gaxpy(1.0, rnlp(n,R*twon+lx), phase(Q(R)));
526                     }
527                 }
528                 else {
529                     r = rnlp(n, lx);
530                 }
531             }
532 
533             rnlp_cache.set(n, lx, r);
534             //print("   SET rnlp", n, lx, r);
535             return *rnlp_cache.getptr(n,lx);
536         }
537     };
538 
539     /// Array of 1D convolutions (one / dimension)
540 
541     /// data for 1 term and all dimensions
542     template <typename Q, int NDIM>
543     class ConvolutionND {
544         std::array<std::shared_ptr<Convolution1D<Q> >, NDIM> ops;
545         Q fac;
546 
547     public:
ConvolutionND()548         ConvolutionND() : fac(1.0) {}
549 
ConvolutionND(const ConvolutionND & other)550         ConvolutionND(const ConvolutionND& other) : fac(other.fac)
551         {
552           ops = other.ops;
553         }
554 
fac(fac)555         ConvolutionND(std::shared_ptr<Convolution1D<Q> > op, Q fac=1.0) : fac(fac)
556         {
557             std::fill(ops.begin(), ops.end(), op);
558         }
559 
setop(int dim,const std::shared_ptr<Convolution1D<Q>> & op)560         void setop(int dim, const std::shared_ptr<Convolution1D<Q> >& op)  {
561             ops[dim] = op;
562         }
563 
getop(int dim)564         std::shared_ptr<Convolution1D<Q> > getop(int dim) const  {
565             return ops[dim];
566         }
567 
setfac(Q value)568         void setfac(Q value) {
569             fac = value;
570         }
571 
getfac()572         Q getfac() const {
573             return fac;
574         }
575     };
576 
577     // To test generic convolution by comparing with GaussianConvolution1D
578     template <typename Q>
579     class GaussianGenericFunctor {
580     private:
581         Q coeff;
582         double exponent;
583         int m;
584         Level natlev;
585 
586     public:
587         // coeff * exp(-exponent*x^2) * x^m
588         GaussianGenericFunctor(Q coeff, double exponent, int m=0)
coeff(coeff)589             : coeff(coeff), exponent(exponent), m(m),
590               natlev(Level(0.5*log(exponent)/log(2.0)+1)) {}
591 
operator()592         Q operator()(double x) const {
593             Q ee = coeff*exp(-exponent*x*x);
594             for (int mm=0; mm<m; ++mm) ee *= x;
595             return ee;
596         }
natural_level()597         Level natural_level() const {return natlev;}
598     };
599 
600 
601     /// Generic 1D convolution using brute force (i.e., slow) adaptive quadrature for rnlp
602 
603     /// Calls op(x) with x in *simulation coordinates* to evaluate the function.
604     template <typename Q, typename opT>
605     class GenericConvolution1D : public Convolution1D<Q> {
606     private:
607         opT op;
608         long maxl;    ///< At natural level is l beyond which operator is zero
609     public:
610 
GenericConvolution1D()611         GenericConvolution1D() {}
612 
613         GenericConvolution1D(int k, const opT& op, int maxR, double arg = 0.0)
614             : Convolution1D<Q>(k, 20, maxR, arg), op(op), maxl(LONG_MAX-1) {
615             // PROFILE_MEMBER_FUNC(GenericConvolution1D); // Too fine grain for routine profiling
616 
617             // For efficiency carefully compute outwards at the "natural" level
618             // until several successive boxes are determined to be zero.  This
619             // then defines the future range of the operator and also serves
620             // to precompute the values used in the rnlp cache.
621 
622             Level natl = natural_level();
623             int nzero = 0;
624             for (Translation lx=0; lx<(1L<<natl); ++lx) {
625                 const Tensor<Q>& rp = this->get_rnlp(natl, lx);
626                 const Tensor<Q>& rm = this->get_rnlp(natl,-lx);
627                 if (rp.normf()<1e-12 && rm.normf()<1e-12) ++nzero;
628                 if (nzero == 3) {
629                     maxl = lx-2;
630                     break;
631                 }
632             }
633         }
634 
natural_level()635         virtual Level natural_level() const {return op.natural_level();}
636 
637         struct Shmoo {
638             typedef Tensor<Q> returnT;
639             Level n;
640             Translation lx;
641             const GenericConvolution1D<Q,opT>& q;
642 
ShmooShmoo643             Shmoo(Level n, Translation lx, const GenericConvolution1D<Q,opT>* q)
644                     : n(n), lx(lx), q(*q) {}
645 
operatorShmoo646             returnT operator()(double x) const {
647                 int twok = q.k*2;
648                 double fac = std::pow(0.5,n);
649                 double phix[twok];
650                 legendre_scaling_functions(x-lx,twok,phix);
651                 Q f = q.op(fac*x)*sqrt(fac);
652                 returnT v(twok);
653                 for (long p=0; p<twok; ++p) v(p) += f*phix[p];
654                 return v;
655             }
656         };
657 
rnlp(Level n,Translation lx)658         Tensor<Q> rnlp(Level n, Translation lx) const {
659             return adq1(lx, lx+1, Shmoo(n, lx, this), 1e-12,
660                         this->npt, this->quad_x.ptr(), this->quad_w.ptr(), 0);
661         }
662 
issmall(Level n,Translation lx)663         bool issmall(Level n, Translation lx) const {
664             if (lx < 0) lx = 1 - lx;
665             // Always compute contributions to nearest neighbor coupling
666             // ... we are two levels below so 0,1 --> 0,1,2,3 --> 0,...,7
667             if (lx <= 7) return false;
668 
669             n = natural_level()-n;
670             if (n >= 0) lx = lx << n;
671             else lx = lx >> n;
672 
673             return lx >= maxl;
674         }
675     };
676 
677 
678     /// 1D convolution with (derivative) Gaussian; coeff and expnt given in *simulation* coordinates [0,1]
679 
680     /// Note that the derivative is computed in *simulation* coordinates so
681     /// you must be careful to scale the coefficients correctly.
682     template <typename Q>
683     class GaussianConvolution1D : public Convolution1D<Q> {
684         // Returns range of Gaussian for periodic lattice sum in simulation coords
maxR(bool periodic,double expnt)685         static int maxR(bool periodic, double expnt) {
686             if (periodic) {
687                 return std::max(1,int(sqrt(16.0*2.3/expnt)+1));
688             }
689             else {
690                 return 0;
691             }
692         }
693     public:
694         const Q coeff;          ///< Coefficient
695         const double expnt;     ///< Exponent
696         const Level natlev;     ///< Level to evaluate
697         const int m;            ///< Order of derivative (0, 1, or 2 only)
698 
699         explicit GaussianConvolution1D(int k, Q coeff, double expnt,
700         		int m, bool periodic, double arg = 0.0)
701             : Convolution1D<Q>(k,k+11,maxR(periodic,expnt),arg)
702             , coeff(coeff)
703             , expnt(expnt)
704             , natlev(Level(0.5*log(expnt)/log(2.0)+1))
705             , m(m)
706         {
707             MADNESS_ASSERT(m>=0 && m<=2);
708             // std::cout << "GC expnt=" << expnt << " coeff="  << coeff << " natlev=" << natlev << " maxR=" << maxR(periodic,expnt) << std::endl;
709             // for (Level n=0; n<5; n++) {
710             //     for (Translation l=0; l<(1<<n); l++) {
711             //         std::cout << "RNLP " << n << " " << l << " " << this->get_rnlp(n,l).normf() << std::endl;
712             //     }
713             //     std::cout << std::endl;
714             // }
715         }
716 
~GaussianConvolution1D()717         virtual ~GaussianConvolution1D() {}
718 
natural_level()719         virtual Level natural_level() const {
720             return natlev;
721         }
722 
723         /// Compute the projection of the operator onto the double order polynomials
724 
725         /// The returned reference is to a cached tensor ... if you want to
726         /// modify it, take a copy first.
727         ///
728         /// Return in \c v[p] \c p=0..2*k-1
729         /// \code
730         /// r(n,l,p) = 2^(-n) * int(K(2^(-n)*(z+l)) * phi(p,z), z=0..1)
731         /// \endcode
732         /// The kernel is coeff*exp(-expnt*z^2)*z^m (with m>0).  This is equivalent to
733         /// \code
734         /// r(n,l,p) = 2^(-n*(m+1))*coeff * int( ((d/dz)^m exp(-beta*z^2)) * phi(p,z-l), z=l..l+1)
735         /// \endcode
736         /// where
737         /// \code
738         /// beta = alpha * 2^(-2*n)
739         /// \endcode
rnlp(Level n,Translation lx)740         Tensor<Q> rnlp(Level n, Translation lx) const {
741             int twok = 2*this->k;
742             Tensor<Q> v(twok);       // Can optimize this away by passing in
743 
744             Translation lkeep = lx;
745             if (lx<0) lx = -lx-1;
746 
747             /* Apply high-order Gauss Legendre onto subintervals
748 
749                coeff*int(exp(-beta(x+l)**2) * z^m * phi[p](x),x=0..1);
750 
751                The translations internally considered are all +ve, so
752                signficant pieces will be on the left.  Finish after things
753                become insignificant.
754 
755                The resulting coefficients are accurate to about 1e-20.
756             */
757 
758             // Rescale expnt & coeff onto level n so integration range
759             // is [l,l+1]
760             Q scaledcoeff = coeff*pow(0.5,0.5*n*(2*m+1));
761 
762             // Subdivide interval into nbox boxes of length h
763             // ... estimate appropriate size from the exponent.  A
764             // Gaussian with real-part of the exponent beta falls in
765             // magnitude by a factor of 1/e at x=1/sqrt(beta), and by
766             // a factor of e^-49 ~ 5e-22 at x=7/sqrt(beta) (and with
767             // polyn of z^2 it is 1e-20).  So, if we use a box of size
768             // 1/sqrt(beta) we will need at most 7 boxes.  Incorporate
769             // the coefficient into the screening since it may be
770             // large.  We can represent exp(-x^2) over [l,l+1] with a
771             // polynomial of order 21 to a relative
772             // precision of better than machine precision for
773             // l=0,1,2,3 and for l>3 the absolute error is less than
774             // 1e-23.  We want to compute matrix elements with
775             // polynomials of order 2*k-1+m, so the total order is
776             // 2*k+20+m, which can be integrated with a quadrature rule
777             // of npt=k+11+(m+1)/2.  npt is set in the constructor.
778 
779             double fourn = std::pow(4.0,double(n));
780             double beta = expnt * pow(0.25,double(n));
781             double h = 1.0/sqrt(beta);  // 2.0*sqrt(0.5/beta);
782             long nbox = long(1.0/h);
783             if (nbox < 1) nbox = 1;
784             h = 1.0/nbox;
785 
786             // Find argmax such that h*scaledcoeff*exp(-argmax)=1e-22 ... if
787             // beta*xlo*xlo is already greater than argmax we can neglect this
788             // and subsequent boxes.
789 
790             // The derivatives add a factor of expnt^m to the size of
791             // the function at long range.
792             double sch = std::abs(scaledcoeff*h);
793             if (m == 1) sch *= expnt;
794             else if (m == 2) sch *= expnt*expnt;
795             double argmax = std::abs(log(1e-22/sch)); // perhaps should be -log(1e-22/sch) ?
796 
797             for (long box=0; box<nbox; ++box) {
798                 double xlo = box*h + lx;
799                 if (beta*xlo*xlo > argmax) break;
800                 for (long i=0; i<this->npt; ++i) {
801 #ifdef IBMXLC
802                     double phix[80];
803 #else
804                     double phix[twok];
805 #endif
806                     double xx = xlo + h*this->quad_x(i);
807                     Q ee = scaledcoeff*exp(-beta*xx*xx)*this->quad_w(i)*h;
808 
809                     // Differentiate as necessary
810                     if (m == 1) {
811                         ee *= -2.0*expnt*xx;
812                     }
813                     else if (m == 2) {
814                         ee *= (4.0*xx*xx*expnt*expnt - 2.0*expnt*fourn);
815                     }
816 
817                     legendre_scaling_functions(xx-lx,twok,phix);
818                     for (long p=0; p<twok; ++p) v(p) += ee*phix[p];
819                 }
820             }
821 
822             if (lkeep < 0) {
823                 /* phi[p](1-z) = (-1)^p phi[p](z) */
824                 if (m == 1)
825                     for (long p=0; p<twok; ++p) v(p) = -v(p);
826                 for (long p=1; p<twok; p+=2) v(p) = -v(p);
827             }
828 
829             return v;
830         };
831 
832         /// Returns true if the block is expected to be small
issmall(Level n,Translation lx)833         bool issmall(Level n, Translation lx) const {
834             double beta = expnt * pow(0.25,double(n));
835             Translation ll;
836             if (lx > 0)
837                 ll = lx - 1;
838             else if (lx < 0)
839                 ll = -1 - lx;
840             else
841                 ll = 0;
842 
843             return (beta*ll*ll > 49.0);      // 49 -> 5e-22     69 -> 1e-30
844         };
845     };
846 
847 
848     template <typename Q>
849     struct GaussianConvolution1DCache {
850         static ConcurrentHashMap<hashT, std::shared_ptr< GaussianConvolution1D<Q> > > map;
851         typedef typename ConcurrentHashMap<hashT, std::shared_ptr< GaussianConvolution1D<Q> > >::iterator iterator;
852         typedef typename ConcurrentHashMap<hashT, std::shared_ptr< GaussianConvolution1D<Q> > >::datumT datumT;
853 
getGaussianConvolution1DCache854         static std::shared_ptr< GaussianConvolution1D<Q> > get(int k, double expnt, int m, bool periodic) {
855             hashT key = hash_value(expnt);
856             hash_combine(key, k);
857             hash_combine(key, m);
858             hash_combine(key, int(periodic));
859 
860             MADNESS_PRAGMA_CLANG(diagnostic push)
861             MADNESS_PRAGMA_CLANG(diagnostic ignored "-Wundefined-var-template")
862 
863             iterator it = map.find(key);
864             if (it == map.end()) {
865                 map.insert(datumT(key, std::make_shared< GaussianConvolution1D<Q> >(k,
866                                                                                     Q(sqrt(expnt/constants::pi)),
867                                                                                     expnt,
868                                                                                     m,
869                                                                                     periodic
870                                                                                     )));
871                 it = map.find(key);
872                 //printf("conv1d: making  %d %.8e\n",k,expnt);
873             }
874             else {
875                 //printf("conv1d: reusing %d %.8e\n",k,expnt);
876             }
877             return it->second;
878 
879             MADNESS_PRAGMA_CLANG(diagnostic pop)
880 
881         }
882     };
883 }
884 
885 #endif // MADNESS_MRA_CONVOLUTION1D_H__INCLUDED
886