1 /* 2 This file is part of MADNESS. 3 4 Copyright (C) 2007,2010 Oak Ridge National Laboratory 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 20 For more information please contact: 21 22 Robert J. Harrison 23 Oak Ridge National Laboratory 24 One Bethel Valley Road 25 P.O. Box 2008, MS-6367 26 27 email: harrisonrj@ornl.gov 28 tel: 865-241-3937 29 fax: 865-572-0680 30 */ 31 32 #ifndef MADNESS_MRA_CONVOLUTION1D_H__INCLUDED 33 #define MADNESS_MRA_CONVOLUTION1D_H__INCLUDED 34 35 #include <madness/world/vector.h> 36 #include <madness/constants.h> 37 #include <limits.h> 38 #include <madness/tensor/tensor.h> 39 #include <madness/mra/simplecache.h> 40 #include <madness/mra/adquad.h> 41 #include <madness/mra/twoscale.h> 42 #include <madness/tensor/aligned.h> 43 #include <madness/tensor/tensor_lapack.h> 44 #include <algorithm> 45 46 /// \file mra/convolution1d.h 47 /// \brief Compuates most matrix elements over 1D operators (including Gaussians) 48 49 /// \ingroup function 50 51 namespace madness { 52 53 void aligned_add(long n, double* MADNESS_RESTRICT a, const double* MADNESS_RESTRICT b); 54 void aligned_sub(long n, double* MADNESS_RESTRICT a, const double* MADNESS_RESTRICT b); 55 void aligned_add(long n, double_complex* MADNESS_RESTRICT a, const double_complex* MADNESS_RESTRICT b); 56 void aligned_sub(long n, double_complex* MADNESS_RESTRICT a, const double_complex* MADNESS_RESTRICT b); 57 58 template <typename T> copy_2d_patch(T * MADNESS_RESTRICT out,long ldout,const T * MADNESS_RESTRICT in,long ldin,long n,long m)59 static void copy_2d_patch(T* MADNESS_RESTRICT out, long ldout, const T* MADNESS_RESTRICT in, long ldin, long n, long m) { 60 for (long i=0; i<n; ++i, out+=ldout, in+=ldin) { 61 for (long j=0; j<m; ++j) { 62 out[j] = in[j]; 63 } 64 } 65 } 66 67 /// a(n,m) --> b(m,n) ... optimized for smallish matrices 68 template <typename T> fast_transpose(long n,long m,const T * a,T * MADNESS_RESTRICT b)69 inline void fast_transpose(long n, long m, const T* a, T* MADNESS_RESTRICT b) { 70 // n will always be k or 2k (k=wavelet order) and m will be anywhere 71 // from 2^(NDIM-1) to (2k)^(NDIM-1). 72 73 // for (long i=0; i<n; ++i) 74 // for (long j=0; j<m; ++j) 75 // b[j*n+i] = a[i*m+j]; 76 // return; 77 78 if (n==1 || m==1) { 79 long nm=n*m; 80 for (long i=0; i<nm; ++i) b[i] = a[i]; 81 return; 82 } 83 84 long n4 = (n>>2)<<2; 85 long m4 = m<<2; 86 const T* a0 = a; 87 for (long i=0; i<n4; i+=4, a0+=m4) { 88 const T* a1 = a0+m; 89 const T* a2 = a1+m; 90 const T* a3 = a2+m; 91 T* MADNESS_RESTRICT bi = b+i; 92 for (long j=0; j<m; ++j, bi+=n) { 93 T tmp0 = a0[j]; 94 T tmp1 = a1[j]; 95 T tmp2 = a2[j]; 96 T tmp3 = a3[j]; 97 98 bi[0] = tmp0; 99 bi[1] = tmp1; 100 bi[2] = tmp2; 101 bi[3] = tmp3; 102 } 103 } 104 105 for (long i=n4; i<n; ++i) 106 for (long j=0; j<m; ++j) 107 b[j*n+i] = a[i*m+j]; 108 109 } 110 111 // /// a(i,j) --> b(i,j) for i=0..n-1 and j=0..r-1 noting dimensions are a(n,m) and b(n,r). 112 113 // /// returns b 114 // template <typename T> 115 // inline T* shrink(long n, long m, long r, const T* a, T* MADNESS_RESTRICT b) { 116 // T* result = b; 117 // if (r == 0) { 118 // ; 119 // } 120 // else if (r == 1) { 121 // for (long i=0; i<n; ++i) { 122 // b[i] = a[i]; 123 // } 124 // } 125 // else if (r == 2) { 126 // for (long i=0; i<n; ++i, a+=m, b+=2) { 127 // b[0] = a[0]; 128 // b[1] = a[1]; 129 // } 130 // } 131 // else if (r == 3) { 132 // for (long i=0; i<n; ++i, a+=m, b+=3) { 133 // b[0] = a[0]; 134 // b[1] = a[1]; 135 // b[2] = a[2]; 136 // } 137 // } 138 // else if (r == 4) { 139 // for (long i=0; i<n; ++i, a+=m, b+=4) { 140 // b[0] = a[0]; 141 // b[1] = a[1]; 142 // b[2] = a[2]; 143 // b[3] = a[3]; 144 // } 145 // } 146 // else { 147 // for (long i=0; i<n; ++i, a+=m, b+=r) { 148 // for (long j=0; j<r; j++) { 149 // b[j] = a[j]; 150 // } 151 // } 152 // } 153 // return result; 154 // } 155 156 /// actual data for 1 dimension and for 1 term and for 1 displacement for a convolution operator 157 /// here we keep the transformation matrices 158 159 /// !!! Note that if Rnormf is zero then ***ALL*** of the tensors are empty 160 template <typename Q> 161 struct ConvolutionData1D { 162 163 164 Tensor<Q> R, T; ///< if NS: R=ns, T=T part of ns; if modified NS: T=\uparrow r^(n-1) 165 Tensor<Q> RU, RVT, TU, TVT; ///< SVD approximations to R and T 166 Tensor<typename Tensor<Q>::scalar_type> Rs, Ts; ///< hold relative errors, NOT the singular values.. 167 168 // norms for NS form 169 double Rnorm, Tnorm, Rnormf, Tnormf, NSnormf; 170 171 // norms for modified NS form 172 double N_up, N_diff, N_F; ///< the norms according to Beylkin 2008, Eq. (21) ff 173 174 175 /// ctor for NS form 176 /// make the operator matrices r^n and \uparrow r^(n-1) 177 /// @param[in] R operator matrix of the requested level; NS: unfilter(r^(n+1)); modified NS: r^n 178 /// @param[in] T upsampled operator matrix from level n-1; NS: r^n; modified NS: filter( r^(n-1) ) ConvolutionData1DConvolutionData1D179 ConvolutionData1D(const Tensor<Q>& R, const Tensor<Q>& T) : R(R), T(T) { 180 Rnormf = R.normf(); 181 // Making the approximations is expensive ... only do it for 182 // significant components 183 if (Rnormf > 1e-20) { 184 Tnormf = T.normf(); 185 make_approx(T, TU, Ts, TVT, Tnorm); 186 make_approx(R, RU, Rs, RVT, Rnorm); 187 int k = T.dim(0); 188 189 Tensor<Q> NS = copy(R); 190 for (int i=0; i<k; ++i) 191 for (int j=0; j<k; ++j) 192 NS(i,j) = 0.0; 193 NSnormf = NS.normf(); 194 195 } 196 else { 197 Rnorm = Tnorm = Rnormf = Tnormf = NSnormf = 0.0; 198 N_F = N_up = N_diff = 0.0; 199 } 200 } 201 202 /// ctor for modified NS form 203 /// make the operator matrices r^n and \uparrow r^(n-1) 204 /// @param[in] R operator matrix of the requested level; NS: unfilter(r^(n+1)); modified NS: r^n 205 /// @param[in] T upsampled operator matrix from level n-1; NS: r^n; modified NS: filter( r^(n-1) ) 206 /// @param[in] modified use (un) modified NS form ConvolutionData1DConvolutionData1D207 ConvolutionData1D(const Tensor<Q>& R, const Tensor<Q>& T, const bool modified) : R(R), T(T) { 208 209 // note that R can be small, but T still be large 210 211 Rnormf = R.normf(); 212 Tnormf = T.normf(); 213 // Making the approximations is expensive ... only do it for 214 // significant components 215 if (Rnormf > 1e-20) make_approx(R, RU, Rs, RVT, Rnorm); 216 if (Tnormf > 1e-20) make_approx(T, TU, Ts, TVT, Tnorm); 217 218 // norms for modified NS form: follow Beylkin, 2008, Eq. (21) ff 219 N_F=Rnormf; 220 N_up=Tnormf; 221 N_diff=(R-T).normf(); 222 } 223 224 225 226 /// approximate the operator matrices using SVD, and abuse Rs to hold the error instead of 227 /// the singular values (seriously, who named this??) make_approxConvolutionData1D228 void make_approx(const Tensor<Q>& R, 229 Tensor<Q>& RU, Tensor<typename Tensor<Q>::scalar_type>& Rs, Tensor<Q>& RVT, double& norm) { 230 int n = R.dim(0); 231 svd(R, RU, Rs, RVT); 232 for (int i=0; i<n; ++i) { 233 for (int j=0; j<n; ++j) { 234 RVT(i,j) *= Rs[i]; 235 } 236 } 237 for (int i=n-1; i>1; --i) { // Form cumulative sum of norms 238 Rs[i-1] += Rs[i]; 239 } 240 241 norm = Rs[0]; 242 if (Rs[0]>0.0) { // Turn into relative errors 243 double rnorm = 1.0/norm; 244 for (int i=0; i<n; ++i) { 245 Rs[i] *= rnorm; 246 } 247 } 248 } 249 }; 250 251 /// Provides the common functionality/interface of all 1D convolutions 252 253 /// interface for 1 term and for 1 dimension; 254 /// the actual data are kept in ConvolutionData1D 255 /// Derived classes must implement rnlp, issmall, natural_level 256 template <typename Q> 257 class Convolution1D { 258 public: 259 typedef Q opT; ///< The apply function uses this to infer resultT=opT*inputT 260 int k; ///< Wavelet order 261 int npt; ///< Number of quadrature points (is this used?) 262 int maxR; ///< Number of lattice translations for sum 263 Tensor<double> quad_x; 264 Tensor<double> quad_w; 265 Tensor<double> c; 266 Tensor<double> hgT, hg; 267 Tensor<double> hgT2k; 268 double arg; 269 270 mutable SimpleCache<Tensor<Q>, 1> rnlp_cache; 271 mutable SimpleCache<Tensor<Q>, 1> rnlij_cache; 272 mutable SimpleCache<ConvolutionData1D<Q>, 1> ns_cache; 273 mutable SimpleCache<ConvolutionData1D<Q>, 2> mod_ns_cache; 274 ~Convolution1D()275 virtual ~Convolution1D() {}; 276 277 Convolution1D(int k, int npt, int maxR, double arg = 0.0) k(k)278 : k(k) 279 , npt(npt) 280 , maxR(maxR) 281 , quad_x(npt) 282 , quad_w(npt) 283 , arg(arg) 284 { 285 auto success = autoc(k,&c); 286 MADNESS_ASSERT(success); 287 288 gauss_legendre(npt,0.0,1.0,quad_x.ptr(),quad_w.ptr()); 289 success = two_scale_hg(k,&hg); 290 MADNESS_ASSERT(success); 291 hgT = transpose(hg); 292 success = two_scale_hg(2*k,&hgT2k); 293 MADNESS_ASSERT(success); 294 hgT2k = transpose(hgT2k); 295 296 // Cannot construct the coefficients here since the 297 // derived class is not yet constructed so cannot call 298 // (even indirectly) a virtual method 299 } 300 301 /// Compute the projection of the operator onto the double order polynomials 302 virtual Tensor<Q> rnlp(Level n, Translation lx) const = 0; 303 304 /// Returns true if the block of rnlp is expected to be small 305 virtual bool issmall(Level n, Translation lx) const = 0; 306 307 /// Returns true if the block of rnlp is expected to be small including periodicity get_issmall(Level n,Translation lx)308 bool get_issmall(Level n, Translation lx) const { 309 if (maxR == 0) { 310 return issmall(n, lx); 311 } 312 else { 313 Translation twon = Translation(1)<<n; 314 for (int R=-maxR; R<=maxR; ++R) { 315 if (!issmall(n, R*twon+lx)) return false; 316 } 317 return true; 318 } 319 } 320 321 /// Returns the level for projection 322 //virtual Level natural_level() const { 323 // return 13; 324 //} natural_level()325 virtual Level natural_level() const {return 13;} 326 327 /// Computes the transition matrix elements for the convolution for n,l 328 329 /// Returns the tensor 330 /// \code 331 /// r(i,j) = int(K(x-y) phi[n0](x) phi[nl](y), x=0..1, y=0..1) 332 /// \endcode 333 /// This is computed from the matrix elements over the correlation 334 /// function which in turn are computed from the matrix elements 335 /// over the double order legendre polynomials. 336 const Tensor<Q>& rnlij(Level n, Translation lx, bool do_transpose=false) const { 337 const Tensor<Q>* p=rnlij_cache.getptr(n,lx); 338 if (p) return *p; 339 340 // PROFILE_MEMBER_FUNC(Convolution1D); // Too fine grain for routine profiling 341 342 long twok = 2*k; 343 Tensor<Q> R(2*twok); 344 R(Slice(0,twok-1)) = get_rnlp(n,lx-1); 345 R(Slice(twok,2*twok-1)) = get_rnlp(n,lx); 346 347 R.scale(pow(0.5,0.5*n)); 348 R = inner(c,R); 349 if (do_transpose) R = transpose(R); 350 rnlij_cache.set(n,lx,R); 351 return *rnlij_cache.getptr(n,lx); 352 }; 353 354 355 /// Returns a pointer to the cached modified nonstandard form of the operator 356 357 /// @param[in] op_key holds the scale and the source and target translations 358 /// @return a pointer to the cached modified nonstandard form of the operator mod_nonstandard(const Key<2> & op_key)359 const ConvolutionData1D<Q>* mod_nonstandard(const Key<2>& op_key) const { 360 361 const Level& n=op_key.level(); 362 const Translation& sx=op_key.translation()[0]; // source translation 363 const Translation& tx=op_key.translation()[1]; // target translation 364 const Translation lx=tx-sx; // displacement 365 const Translation s_off=sx%2; 366 const Translation t_off=tx%2; 367 368 // we cache translation and source offset 369 const Key<2> cache_key(n, Vector<Translation,2>{lx, s_off} ); 370 const ConvolutionData1D<Q>* p = mod_ns_cache.getptr(cache_key); 371 if (p) return p; 372 373 // for paranoid me 374 MADNESS_ASSERT(sx>=0 and tx>=0); 375 376 377 Tensor<Q> R, T, Rm; 378 // if (!get_issmall(n, lx)) { 379 // print("no issmall", lx, source, n); 380 381 const Translation lx_half = tx/2 - sx/2; 382 const Slice s0(0,k-1), s1(k,2*k-1); 383 // print("sx, tx",lx,lx_half,sx, tx,"off",s_off,t_off); 384 385 // this is the operator matrix in its actual level 386 R = rnlij(n,lx); 387 388 // this is the upsampled operator matrix 389 Rm = Tensor<Q>(2*k,2*k); 390 if (n>0) Rm(s0,s0)=rnlij(n-1,lx_half); 391 { 392 // PROFILE_BLOCK(Convolution1D_nstran); // Too fine grain for routine profiling 393 Rm = transform(Rm,hg); 394 } 395 396 { 397 // PROFILE_BLOCK(Convolution1D_nscopy); // Too fine grain for routine profiling 398 T=Tensor<Q>(k,k); 399 if (t_off==0 and s_off==0) T=copy(Rm(s0,s0)); 400 if (t_off==0 and s_off==1) T=copy(Rm(s0,s1)); 401 if (t_off==1 and s_off==0) T=copy(Rm(s1,s0)); 402 if (t_off==1 and s_off==1) T=copy(Rm(s1,s1)); 403 // if (t_off==0 and s_off==0) T=copy(Rm(s0,s0)); 404 // if (t_off==1 and s_off==0) T=copy(Rm(s0,s1)); 405 // if (t_off==0 and s_off==1) T=copy(Rm(s1,s0)); 406 // if (t_off==1 and s_off==1) T=copy(Rm(s1,s1)); 407 } 408 409 { 410 // PROFILE_BLOCK(Convolution1D_trans); // Too fine grain for routine profiling 411 412 Tensor<Q> RT(k,k), TT(k,k); 413 fast_transpose(k,k,R.ptr(), RT.ptr()); 414 fast_transpose(k,k,T.ptr(), TT.ptr()); 415 R = RT; 416 T = TT; 417 } 418 419 // } 420 421 mod_ns_cache.set(cache_key,ConvolutionData1D<Q>(R,T,true)); 422 return mod_ns_cache.getptr(cache_key); 423 } 424 425 /// Returns a pointer to the cached nonstandard form of the operator nonstandard(Level n,Translation lx)426 const ConvolutionData1D<Q>* nonstandard(Level n, Translation lx) const { 427 const ConvolutionData1D<Q>* p = ns_cache.getptr(n,lx); 428 if (p) return p; 429 430 // PROFILE_MEMBER_FUNC(Convolution1D); // Too fine grain for routine profiling 431 432 Tensor<Q> R, T; 433 if (!get_issmall(n, lx)) { 434 Translation lx2 = lx*2; 435 #if 0 // UNUSED VARIABLES 436 Slice s0(0,k-1), s1(k,2*k-1); 437 #endif 438 const Tensor<Q> r0 = rnlij(n+1,lx2); 439 const Tensor<Q> rp = rnlij(n+1,lx2+1); 440 const Tensor<Q> rm = rnlij(n+1,lx2-1); 441 442 R = Tensor<Q>(2*k,2*k); 443 444 // R(s0,s0) = r0; 445 // R(s1,s1) = r0; 446 // R(s1,s0) = rp; 447 // R(s0,s1) = rm; 448 449 { 450 // PROFILE_BLOCK(Convolution1D_nscopy); // Too fine grain for routine profiling 451 copy_2d_patch(R.ptr(), 2*k, r0.ptr(), k, k, k); 452 copy_2d_patch(R.ptr()+2*k*k + k, 2*k, r0.ptr(), k, k, k); 453 copy_2d_patch(R.ptr()+2*k*k, 2*k, rp.ptr(), k, k, k); 454 copy_2d_patch(R.ptr() + k, 2*k, rm.ptr(), k, k, k); 455 } 456 457 //print("R ", n, lx, R.normf(), r0.normf(), rp.normf(), rm.normf()); 458 459 460 { 461 // PROFILE_BLOCK(Convolution1D_nstran); // Too fine grain for routine profiling 462 R = transform(R,hgT); 463 } 464 465 //print("RX", n, lx, R.normf(), r0.normf(), rp.normf(), rm.normf()); 466 467 { 468 // PROFILE_BLOCK(Convolution1D_trans); // Too fine grain for routine profiling 469 470 Tensor<Q> RT(2*k,2*k); 471 fast_transpose(2*k, 2*k, R.ptr(), RT.ptr()); 472 R = RT; 473 474 //print("RT", n, lx, R.normf(), r0.normf(), rp.normf(), rm.normf()); 475 476 //T = copy(R(s0,s0)); 477 T = Tensor<Q>(k,k); 478 copy_2d_patch(T.ptr(), k, R.ptr(), 2*k, k, k); 479 } 480 481 //print("NS", n, lx, R.normf(), T.normf()); 482 } 483 484 ns_cache.set(n,lx,ConvolutionData1D<Q>(R,T)); 485 486 return ns_cache.getptr(n,lx); 487 }; 488 phase(double R)489 Q phase(double R) const { 490 return 1.0; 491 } 492 phase(double_complex R)493 Q phase(double_complex R) const { 494 return exp(double_complex(0.0,arg)*R); 495 } 496 497 get_rnlp(Level n,Translation lx)498 const Tensor<Q>& get_rnlp(Level n, Translation lx) const { 499 const Tensor<Q>* p=rnlp_cache.getptr(n,lx); 500 if (p) return *p; 501 502 // PROFILE_MEMBER_FUNC(Convolution1D); // Too fine grain for routine profiling 503 504 long twok = 2*k; 505 Tensor<Q> r; 506 507 if (get_issmall(n, lx)) { 508 r = Tensor<Q>(twok); 509 } 510 else if (n < natural_level()) { 511 Tensor<Q> R(2*twok); 512 R(Slice(0,twok-1)) = get_rnlp(n+1,2*lx); 513 R(Slice(twok,2*twok-1)) = get_rnlp(n+1,2*lx+1); 514 515 R = transform(R, hgT2k); 516 r = copy(R(Slice(0,twok-1))); 517 } 518 else { 519 // PROFILE_BLOCK(Convolution1Drnlp); // Too fine grain for routine profiling 520 521 if (maxR > 0) { 522 Translation twon = Translation(1)<<n; 523 r = Tensor<Q>(2*k); 524 for (int R=-maxR; R<=maxR; ++R) { 525 r.gaxpy(1.0, rnlp(n,R*twon+lx), phase(Q(R))); 526 } 527 } 528 else { 529 r = rnlp(n, lx); 530 } 531 } 532 533 rnlp_cache.set(n, lx, r); 534 //print(" SET rnlp", n, lx, r); 535 return *rnlp_cache.getptr(n,lx); 536 } 537 }; 538 539 /// Array of 1D convolutions (one / dimension) 540 541 /// data for 1 term and all dimensions 542 template <typename Q, int NDIM> 543 class ConvolutionND { 544 std::array<std::shared_ptr<Convolution1D<Q> >, NDIM> ops; 545 Q fac; 546 547 public: ConvolutionND()548 ConvolutionND() : fac(1.0) {} 549 ConvolutionND(const ConvolutionND & other)550 ConvolutionND(const ConvolutionND& other) : fac(other.fac) 551 { 552 ops = other.ops; 553 } 554 fac(fac)555 ConvolutionND(std::shared_ptr<Convolution1D<Q> > op, Q fac=1.0) : fac(fac) 556 { 557 std::fill(ops.begin(), ops.end(), op); 558 } 559 setop(int dim,const std::shared_ptr<Convolution1D<Q>> & op)560 void setop(int dim, const std::shared_ptr<Convolution1D<Q> >& op) { 561 ops[dim] = op; 562 } 563 getop(int dim)564 std::shared_ptr<Convolution1D<Q> > getop(int dim) const { 565 return ops[dim]; 566 } 567 setfac(Q value)568 void setfac(Q value) { 569 fac = value; 570 } 571 getfac()572 Q getfac() const { 573 return fac; 574 } 575 }; 576 577 // To test generic convolution by comparing with GaussianConvolution1D 578 template <typename Q> 579 class GaussianGenericFunctor { 580 private: 581 Q coeff; 582 double exponent; 583 int m; 584 Level natlev; 585 586 public: 587 // coeff * exp(-exponent*x^2) * x^m 588 GaussianGenericFunctor(Q coeff, double exponent, int m=0) coeff(coeff)589 : coeff(coeff), exponent(exponent), m(m), 590 natlev(Level(0.5*log(exponent)/log(2.0)+1)) {} 591 operator()592 Q operator()(double x) const { 593 Q ee = coeff*exp(-exponent*x*x); 594 for (int mm=0; mm<m; ++mm) ee *= x; 595 return ee; 596 } natural_level()597 Level natural_level() const {return natlev;} 598 }; 599 600 601 /// Generic 1D convolution using brute force (i.e., slow) adaptive quadrature for rnlp 602 603 /// Calls op(x) with x in *simulation coordinates* to evaluate the function. 604 template <typename Q, typename opT> 605 class GenericConvolution1D : public Convolution1D<Q> { 606 private: 607 opT op; 608 long maxl; ///< At natural level is l beyond which operator is zero 609 public: 610 GenericConvolution1D()611 GenericConvolution1D() {} 612 613 GenericConvolution1D(int k, const opT& op, int maxR, double arg = 0.0) 614 : Convolution1D<Q>(k, 20, maxR, arg), op(op), maxl(LONG_MAX-1) { 615 // PROFILE_MEMBER_FUNC(GenericConvolution1D); // Too fine grain for routine profiling 616 617 // For efficiency carefully compute outwards at the "natural" level 618 // until several successive boxes are determined to be zero. This 619 // then defines the future range of the operator and also serves 620 // to precompute the values used in the rnlp cache. 621 622 Level natl = natural_level(); 623 int nzero = 0; 624 for (Translation lx=0; lx<(1L<<natl); ++lx) { 625 const Tensor<Q>& rp = this->get_rnlp(natl, lx); 626 const Tensor<Q>& rm = this->get_rnlp(natl,-lx); 627 if (rp.normf()<1e-12 && rm.normf()<1e-12) ++nzero; 628 if (nzero == 3) { 629 maxl = lx-2; 630 break; 631 } 632 } 633 } 634 natural_level()635 virtual Level natural_level() const {return op.natural_level();} 636 637 struct Shmoo { 638 typedef Tensor<Q> returnT; 639 Level n; 640 Translation lx; 641 const GenericConvolution1D<Q,opT>& q; 642 ShmooShmoo643 Shmoo(Level n, Translation lx, const GenericConvolution1D<Q,opT>* q) 644 : n(n), lx(lx), q(*q) {} 645 operatorShmoo646 returnT operator()(double x) const { 647 int twok = q.k*2; 648 double fac = std::pow(0.5,n); 649 double phix[twok]; 650 legendre_scaling_functions(x-lx,twok,phix); 651 Q f = q.op(fac*x)*sqrt(fac); 652 returnT v(twok); 653 for (long p=0; p<twok; ++p) v(p) += f*phix[p]; 654 return v; 655 } 656 }; 657 rnlp(Level n,Translation lx)658 Tensor<Q> rnlp(Level n, Translation lx) const { 659 return adq1(lx, lx+1, Shmoo(n, lx, this), 1e-12, 660 this->npt, this->quad_x.ptr(), this->quad_w.ptr(), 0); 661 } 662 issmall(Level n,Translation lx)663 bool issmall(Level n, Translation lx) const { 664 if (lx < 0) lx = 1 - lx; 665 // Always compute contributions to nearest neighbor coupling 666 // ... we are two levels below so 0,1 --> 0,1,2,3 --> 0,...,7 667 if (lx <= 7) return false; 668 669 n = natural_level()-n; 670 if (n >= 0) lx = lx << n; 671 else lx = lx >> n; 672 673 return lx >= maxl; 674 } 675 }; 676 677 678 /// 1D convolution with (derivative) Gaussian; coeff and expnt given in *simulation* coordinates [0,1] 679 680 /// Note that the derivative is computed in *simulation* coordinates so 681 /// you must be careful to scale the coefficients correctly. 682 template <typename Q> 683 class GaussianConvolution1D : public Convolution1D<Q> { 684 // Returns range of Gaussian for periodic lattice sum in simulation coords maxR(bool periodic,double expnt)685 static int maxR(bool periodic, double expnt) { 686 if (periodic) { 687 return std::max(1,int(sqrt(16.0*2.3/expnt)+1)); 688 } 689 else { 690 return 0; 691 } 692 } 693 public: 694 const Q coeff; ///< Coefficient 695 const double expnt; ///< Exponent 696 const Level natlev; ///< Level to evaluate 697 const int m; ///< Order of derivative (0, 1, or 2 only) 698 699 explicit GaussianConvolution1D(int k, Q coeff, double expnt, 700 int m, bool periodic, double arg = 0.0) 701 : Convolution1D<Q>(k,k+11,maxR(periodic,expnt),arg) 702 , coeff(coeff) 703 , expnt(expnt) 704 , natlev(Level(0.5*log(expnt)/log(2.0)+1)) 705 , m(m) 706 { 707 MADNESS_ASSERT(m>=0 && m<=2); 708 // std::cout << "GC expnt=" << expnt << " coeff=" << coeff << " natlev=" << natlev << " maxR=" << maxR(periodic,expnt) << std::endl; 709 // for (Level n=0; n<5; n++) { 710 // for (Translation l=0; l<(1<<n); l++) { 711 // std::cout << "RNLP " << n << " " << l << " " << this->get_rnlp(n,l).normf() << std::endl; 712 // } 713 // std::cout << std::endl; 714 // } 715 } 716 ~GaussianConvolution1D()717 virtual ~GaussianConvolution1D() {} 718 natural_level()719 virtual Level natural_level() const { 720 return natlev; 721 } 722 723 /// Compute the projection of the operator onto the double order polynomials 724 725 /// The returned reference is to a cached tensor ... if you want to 726 /// modify it, take a copy first. 727 /// 728 /// Return in \c v[p] \c p=0..2*k-1 729 /// \code 730 /// r(n,l,p) = 2^(-n) * int(K(2^(-n)*(z+l)) * phi(p,z), z=0..1) 731 /// \endcode 732 /// The kernel is coeff*exp(-expnt*z^2)*z^m (with m>0). This is equivalent to 733 /// \code 734 /// r(n,l,p) = 2^(-n*(m+1))*coeff * int( ((d/dz)^m exp(-beta*z^2)) * phi(p,z-l), z=l..l+1) 735 /// \endcode 736 /// where 737 /// \code 738 /// beta = alpha * 2^(-2*n) 739 /// \endcode rnlp(Level n,Translation lx)740 Tensor<Q> rnlp(Level n, Translation lx) const { 741 int twok = 2*this->k; 742 Tensor<Q> v(twok); // Can optimize this away by passing in 743 744 Translation lkeep = lx; 745 if (lx<0) lx = -lx-1; 746 747 /* Apply high-order Gauss Legendre onto subintervals 748 749 coeff*int(exp(-beta(x+l)**2) * z^m * phi[p](x),x=0..1); 750 751 The translations internally considered are all +ve, so 752 signficant pieces will be on the left. Finish after things 753 become insignificant. 754 755 The resulting coefficients are accurate to about 1e-20. 756 */ 757 758 // Rescale expnt & coeff onto level n so integration range 759 // is [l,l+1] 760 Q scaledcoeff = coeff*pow(0.5,0.5*n*(2*m+1)); 761 762 // Subdivide interval into nbox boxes of length h 763 // ... estimate appropriate size from the exponent. A 764 // Gaussian with real-part of the exponent beta falls in 765 // magnitude by a factor of 1/e at x=1/sqrt(beta), and by 766 // a factor of e^-49 ~ 5e-22 at x=7/sqrt(beta) (and with 767 // polyn of z^2 it is 1e-20). So, if we use a box of size 768 // 1/sqrt(beta) we will need at most 7 boxes. Incorporate 769 // the coefficient into the screening since it may be 770 // large. We can represent exp(-x^2) over [l,l+1] with a 771 // polynomial of order 21 to a relative 772 // precision of better than machine precision for 773 // l=0,1,2,3 and for l>3 the absolute error is less than 774 // 1e-23. We want to compute matrix elements with 775 // polynomials of order 2*k-1+m, so the total order is 776 // 2*k+20+m, which can be integrated with a quadrature rule 777 // of npt=k+11+(m+1)/2. npt is set in the constructor. 778 779 double fourn = std::pow(4.0,double(n)); 780 double beta = expnt * pow(0.25,double(n)); 781 double h = 1.0/sqrt(beta); // 2.0*sqrt(0.5/beta); 782 long nbox = long(1.0/h); 783 if (nbox < 1) nbox = 1; 784 h = 1.0/nbox; 785 786 // Find argmax such that h*scaledcoeff*exp(-argmax)=1e-22 ... if 787 // beta*xlo*xlo is already greater than argmax we can neglect this 788 // and subsequent boxes. 789 790 // The derivatives add a factor of expnt^m to the size of 791 // the function at long range. 792 double sch = std::abs(scaledcoeff*h); 793 if (m == 1) sch *= expnt; 794 else if (m == 2) sch *= expnt*expnt; 795 double argmax = std::abs(log(1e-22/sch)); // perhaps should be -log(1e-22/sch) ? 796 797 for (long box=0; box<nbox; ++box) { 798 double xlo = box*h + lx; 799 if (beta*xlo*xlo > argmax) break; 800 for (long i=0; i<this->npt; ++i) { 801 #ifdef IBMXLC 802 double phix[80]; 803 #else 804 double phix[twok]; 805 #endif 806 double xx = xlo + h*this->quad_x(i); 807 Q ee = scaledcoeff*exp(-beta*xx*xx)*this->quad_w(i)*h; 808 809 // Differentiate as necessary 810 if (m == 1) { 811 ee *= -2.0*expnt*xx; 812 } 813 else if (m == 2) { 814 ee *= (4.0*xx*xx*expnt*expnt - 2.0*expnt*fourn); 815 } 816 817 legendre_scaling_functions(xx-lx,twok,phix); 818 for (long p=0; p<twok; ++p) v(p) += ee*phix[p]; 819 } 820 } 821 822 if (lkeep < 0) { 823 /* phi[p](1-z) = (-1)^p phi[p](z) */ 824 if (m == 1) 825 for (long p=0; p<twok; ++p) v(p) = -v(p); 826 for (long p=1; p<twok; p+=2) v(p) = -v(p); 827 } 828 829 return v; 830 }; 831 832 /// Returns true if the block is expected to be small issmall(Level n,Translation lx)833 bool issmall(Level n, Translation lx) const { 834 double beta = expnt * pow(0.25,double(n)); 835 Translation ll; 836 if (lx > 0) 837 ll = lx - 1; 838 else if (lx < 0) 839 ll = -1 - lx; 840 else 841 ll = 0; 842 843 return (beta*ll*ll > 49.0); // 49 -> 5e-22 69 -> 1e-30 844 }; 845 }; 846 847 848 template <typename Q> 849 struct GaussianConvolution1DCache { 850 static ConcurrentHashMap<hashT, std::shared_ptr< GaussianConvolution1D<Q> > > map; 851 typedef typename ConcurrentHashMap<hashT, std::shared_ptr< GaussianConvolution1D<Q> > >::iterator iterator; 852 typedef typename ConcurrentHashMap<hashT, std::shared_ptr< GaussianConvolution1D<Q> > >::datumT datumT; 853 getGaussianConvolution1DCache854 static std::shared_ptr< GaussianConvolution1D<Q> > get(int k, double expnt, int m, bool periodic) { 855 hashT key = hash_value(expnt); 856 hash_combine(key, k); 857 hash_combine(key, m); 858 hash_combine(key, int(periodic)); 859 860 MADNESS_PRAGMA_CLANG(diagnostic push) 861 MADNESS_PRAGMA_CLANG(diagnostic ignored "-Wundefined-var-template") 862 863 iterator it = map.find(key); 864 if (it == map.end()) { 865 map.insert(datumT(key, std::make_shared< GaussianConvolution1D<Q> >(k, 866 Q(sqrt(expnt/constants::pi)), 867 expnt, 868 m, 869 periodic 870 ))); 871 it = map.find(key); 872 //printf("conv1d: making %d %.8e\n",k,expnt); 873 } 874 else { 875 //printf("conv1d: reusing %d %.8e\n",k,expnt); 876 } 877 return it->second; 878 879 MADNESS_PRAGMA_CLANG(diagnostic pop) 880 881 } 882 }; 883 } 884 885 #endif // MADNESS_MRA_CONVOLUTION1D_H__INCLUDED 886