1 /* 2 This file is part of MADNESS. 3 4 Copyright (C) 2007,2010 Oak Ridge National Laboratory 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 20 For more information please contact: 21 22 Robert J. Harrison 23 Oak Ridge National Laboratory 24 One Bethel Valley Road 25 P.O. Box 2008, MS-6367 26 27 email: harrisonrj@ornl.gov 28 tel: 865-241-3937 29 fax: 865-572-0680 30 31 $Id$ 32 */ 33 #ifndef MADNESS_MRA_VMRA_H__INCLUDED 34 #define MADNESS_MRA_VMRA_H__INCLUDED 35 36 /*! 37 \file vmra.h 38 \brief Defines operations on vectors of Functions 39 \ingroup mra 40 41 This file defines a number of operations on vectors of functions. 42 Assume v is a vector of NDIM-D functions of a certain type. 43 44 45 Operations on array of functions 46 47 *) copying: deep copying of vectors of functions to vector of functions 48 \code 49 vector2 = copy(world, vector1,fence); 50 \endcode 51 52 *) compress: convert multiwavelet representation to legendre representation 53 \code 54 compress(world, vector, fence); 55 \endcode 56 57 *) reconstruct: convert representation to multiwavelets 58 \code 59 reconstruct(world, vector, fence); 60 \endcode 61 62 *) nonstandard: convert to non-standard form 63 \code 64 nonstandard(world, v, fence); 65 \endcode 66 67 *) standard: convert to standard form 68 \code 69 standard(world, v, fence); 70 \endcode 71 72 *) truncate: truncating vectors of functions to desired precision 73 \code 74 truncate(world, v, tolerance, fence); 75 \endcode 76 77 78 *) zero function: create a vector of zero functions of length n 79 \code 80 v=zero(world, n); 81 \endcode 82 83 *) transform: transform a representation from one basis to another 84 \code 85 transform(world, vector, tensor, tolerance, fence ) 86 \endcode 87 88 Setting thresh-hold for precision 89 90 *) set_thresh: setting a finite thresh-hold for a vector of functions 91 \code 92 void set_thresh(World& world, std::vector< Function<T,NDIM> >& v, double thresh, bool fence=true); 93 \endcode 94 95 Arithmetic Operations on arrays of functions 96 97 *) conjugation: conjugate a vector of complex functions 98 99 *) add 100 *) sub 101 *) mul 102 - mul_sparse 103 *) square 104 *) gaxpy 105 *) apply 106 107 Norms, inner-products, blas-1 like operations on vectors of functions 108 109 *) inner 110 *) matrix_inner 111 *) norm_tree 112 *) normalize 113 *) norm2 114 - norm2s 115 *) scale(world, v, alpha); 116 117 118 119 120 */ 121 122 #include <madness/mra/mra.h> 123 #include <madness/mra/derivative.h> 124 #include <madness/tensor/distributed_matrix.h> 125 #include <cstdio> 126 127 namespace madness { 128 129 130 131 /// Compress a vector of functions 132 template <typename T, std::size_t NDIM> 133 void compress(World& world, 134 const std::vector< Function<T,NDIM> >& v, 135 bool fence=true) { 136 137 PROFILE_BLOCK(Vcompress); 138 bool must_fence = false; 139 for (unsigned int i=0; i<v.size(); ++i) { 140 if (!v[i].is_compressed()) { 141 v[i].compress(false); 142 must_fence = true; 143 } 144 } 145 146 if (fence && must_fence) world.gop.fence(); 147 } 148 149 150 /// Reconstruct a vector of functions 151 template <typename T, std::size_t NDIM> 152 void reconstruct(World& world, 153 const std::vector< Function<T,NDIM> >& v, 154 bool fence=true) { 155 PROFILE_BLOCK(Vreconstruct); 156 bool must_fence = false; 157 for (unsigned int i=0; i<v.size(); ++i) { 158 if (v[i].is_compressed()) { 159 v[i].reconstruct(false); 160 must_fence = true; 161 } 162 } 163 164 if (fence && must_fence) world.gop.fence(); 165 } 166 167 /// refine the functions according to the autorefine criteria 168 template <typename T, std::size_t NDIM> 169 void refine(World& world, const std::vector<Function<T,NDIM> >& vf, 170 bool fence=true) { 171 for (const auto& f : vf) f.refine(false); 172 if (fence) world.gop.fence(); 173 } 174 175 /// refine all functions to a common (finest) level 176 177 /// if functions are not initialized (impl==NULL) they are ignored 178 template <typename T, std::size_t NDIM> 179 void refine_to_common_level(World& world, std::vector<Function<T,NDIM> >& vf, 180 bool fence=true) { 181 182 reconstruct(world,vf); 183 Key<NDIM> key0(0, Vector<Translation, NDIM> (0)); 184 std::vector<FunctionImpl<T,NDIM>*> v_ptr; 185 186 // push initialized function pointers into the vector v_ptr 187 for (unsigned int i=0; i<vf.size(); ++i) { 188 if (vf[i].is_initialized()) v_ptr.push_back(vf[i].get_impl().get()); 189 } 190 191 // sort and remove duplicates to not confuse the refining function 192 std::sort(v_ptr.begin(),v_ptr.end()); 193 typename std::vector<FunctionImpl<T, NDIM>*>::iterator it; 194 it = std::unique(v_ptr.begin(), v_ptr.end()); 195 v_ptr.resize( std::distance(v_ptr.begin(),it) ); 196 197 std::vector< Tensor<T> > c(v_ptr.size()); 198 v_ptr[0]->refine_to_common_level(v_ptr, c, key0); 199 if (fence) v_ptr[0]->world.gop.fence(); 200 if (VERIFY_TREE) 201 for (unsigned int i=0; i<vf.size(); i++) vf[i].verify_tree(); 202 } 203 204 /// Generates non-standard form of a vector of functions 205 template <typename T, std::size_t NDIM> 206 void nonstandard(World& world, 207 std::vector< Function<T,NDIM> >& v, 208 bool fence=true) { 209 PROFILE_BLOCK(Vnonstandard); 210 reconstruct(world, v); 211 for (unsigned int i=0; i<v.size(); ++i) { 212 v[i].nonstandard(false,false); 213 } 214 if (fence) world.gop.fence(); 215 } 216 217 218 /// Generates standard form of a vector of functions 219 template <typename T, std::size_t NDIM> 220 void standard(World& world, 221 std::vector< Function<T,NDIM> >& v, 222 bool fence=true) { 223 PROFILE_BLOCK(Vstandard); 224 for (unsigned int i=0; i<v.size(); ++i) { 225 v[i].standard(false); 226 } 227 if (fence) world.gop.fence(); 228 } 229 230 231 /// Truncates a vector of functions 232 template <typename T, std::size_t NDIM> 233 void truncate(World& world, 234 std::vector< Function<T,NDIM> >& v, 235 double tol=0.0, 236 bool fence=true) { 237 PROFILE_BLOCK(Vtruncate); 238 239 compress(world, v); 240 241 for (unsigned int i=0; i<v.size(); ++i) { 242 v[i].truncate(tol, false); 243 } 244 245 if (fence) world.gop.fence(); 246 } 247 248 /// Truncates a vector of functions 249 250 /// @return the truncated vector for chaining 251 template <typename T, std::size_t NDIM> 252 std::vector< Function<T,NDIM> > truncate(std::vector< Function<T,NDIM> > v, 253 double tol=0.0, bool fence=true) { 254 if (v.size()>0) truncate(v[0].world(),v,tol,fence); 255 return v; 256 } 257 258 259 /// Applies a derivative operator to a vector of functions 260 template <typename T, std::size_t NDIM> 261 std::vector< Function<T,NDIM> > 262 apply(World& world, 263 const Derivative<T,NDIM>& D, 264 const std::vector< Function<T,NDIM> >& v, 265 bool fence=true) 266 { 267 reconstruct(world, v); 268 std::vector< Function<T,NDIM> > df(v.size()); 269 for (unsigned int i=0; i<v.size(); ++i) { 270 df[i] = D(v[i],false); 271 } 272 if (fence) world.gop.fence(); 273 return df; 274 } 275 276 /// Generates a vector of zero functions (reconstructed) 277 template <typename T, std::size_t NDIM> 278 std::vector< Function<T,NDIM> > 279 zero_functions(World& world, int n, bool fence=true) { 280 PROFILE_BLOCK(Vzero_functions); 281 std::vector< Function<T,NDIM> > r(n); 282 for (int i=0; i<n; ++i) 283 r[i] = Function<T,NDIM>(FunctionFactory<T,NDIM>(world).fence(false)); 284 285 if (n && fence) world.gop.fence(); 286 287 return r; 288 } 289 290 /// Generates a vector of zero functions (compressed) 291 template <typename T, std::size_t NDIM> 292 std::vector< Function<T,NDIM> > 293 zero_functions_compressed(World& world, int n, bool fence=true) { 294 PROFILE_BLOCK(Vzero_functions); 295 std::vector< Function<T,NDIM> > r(n); 296 for (int i=0; i<n; ++i) 297 r[i] = Function<T,NDIM>(FunctionFactory<T,NDIM>(world).fence(false).compressed(true).initial_level(1)); 298 299 if (n && fence) world.gop.fence(); 300 301 return r; 302 } 303 304 305 /// Transforms a vector of functions according to new[i] = sum[j] old[j]*c[j,i] 306 307 /// Uses sparsity in the transformation matrix --- set small elements to 308 /// zero to take advantage of this. 309 template <typename T, typename R, std::size_t NDIM> 310 std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > 311 transform(World& world, 312 const std::vector< Function<T,NDIM> >& v, 313 const Tensor<R>& c, 314 bool fence=true) { 315 316 PROFILE_BLOCK(Vtransformsp); 317 typedef TENSOR_RESULT_TYPE(T,R) resultT; 318 int n = v.size(); // n is the old dimension 319 int m = c.dim(1); // m is the new dimension 320 MADNESS_ASSERT(n==c.dim(0)); 321 322 std::vector< Function<resultT,NDIM> > vc = zero_functions_compressed<resultT,NDIM>(world, m); 323 compress(world, v); 324 325 for (int i=0; i<m; ++i) { 326 for (int j=0; j<n; ++j) { 327 if (c(j,i) != R(0.0)) vc[i].gaxpy(1.0,v[j],c(j,i),false); 328 } 329 } 330 331 if (fence) world.gop.fence(); 332 return vc; 333 } 334 335 336 template <typename L, typename R, std::size_t NDIM> 337 std::vector< Function<TENSOR_RESULT_TYPE(L,R),NDIM> > transform(World & world,const std::vector<Function<L,NDIM>> & v,const Tensor<R> & c,double tol,bool fence)338 transform(World& world, const std::vector< Function<L,NDIM> >& v, 339 const Tensor<R>& c, double tol, bool fence) { 340 PROFILE_BLOCK(Vtransform); 341 MADNESS_ASSERT(v.size() == (unsigned int)(c.dim(0))); 342 343 std::vector< Function<TENSOR_RESULT_TYPE(L,R),NDIM> > vresult 344 = zero_functions_compressed<TENSOR_RESULT_TYPE(L,R),NDIM>(world, c.dim(1)); 345 346 compress(world, v, true); 347 vresult[0].vtransform(v, c, vresult, tol, fence); 348 return vresult; 349 } 350 351 template <typename T, typename R, std::size_t NDIM> 352 std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > 353 transform(World& world, 354 const std::vector< Function<T,NDIM> >& v, 355 const DistributedMatrix<R>& c, 356 bool fence=true) { 357 PROFILE_FUNC; 358 359 typedef TENSOR_RESULT_TYPE(T,R) resultT; 360 long n = v.size(); // n is the old dimension 361 long m = c.rowdim(); // m is the new dimension 362 MADNESS_ASSERT(n==c.coldim()); 363 364 // new(i) = sum(j) old(j) c(j,i) 365 366 Tensor<T> tmp(n,m); 367 c.copy_to_replicated(tmp); // for debugging 368 tmp = transpose(tmp); 369 370 std::vector< Function<resultT,NDIM> > vc = zero_functions_compressed<resultT,NDIM>(world, m); 371 compress(world, v); 372 373 for (int i=0; i<m; ++i) { 374 for (int j=0; j<n; ++j) { 375 if (tmp(j,i) != R(0.0)) vc[i].gaxpy(1.0,v[j],tmp(j,i),false); 376 } 377 } 378 379 if (fence) world.gop.fence(); 380 return vc; 381 } 382 383 384 /// Scales inplace a vector of functions by distinct values 385 template <typename T, typename Q, std::size_t NDIM> 386 void scale(World& world, 387 std::vector< Function<T,NDIM> >& v, 388 const std::vector<Q>& factors, 389 bool fence=true) { 390 PROFILE_BLOCK(Vscale); 391 for (unsigned int i=0; i<v.size(); ++i) v[i].scale(factors[i],false); 392 if (fence) world.gop.fence(); 393 } 394 395 /// Scales inplace a vector of functions by the same 396 template <typename T, typename Q, std::size_t NDIM> 397 void scale(World& world, 398 std::vector< Function<T,NDIM> >& v, 399 const Q factor, 400 bool fence=true) { 401 PROFILE_BLOCK(Vscale); 402 for (unsigned int i=0; i<v.size(); ++i) v[i].scale(factor,false); 403 if (fence) world.gop.fence(); 404 } 405 406 /// Computes the 2-norms of a vector of functions 407 template <typename T, std::size_t NDIM> norm2s(World & world,const std::vector<Function<T,NDIM>> & v)408 std::vector<double> norm2s(World& world, 409 const std::vector< Function<T,NDIM> >& v) { 410 PROFILE_BLOCK(Vnorm2); 411 std::vector<double> norms(v.size()); 412 for (unsigned int i=0; i<v.size(); ++i) norms[i] = v[i].norm2sq_local(); 413 world.gop.sum(&norms[0], norms.size()); 414 for (unsigned int i=0; i<v.size(); ++i) norms[i] = sqrt(norms[i]); 415 world.gop.fence(); 416 return norms; 417 } 418 419 /// Computes the 2-norm of a vector of functions 420 template <typename T, std::size_t NDIM> norm2(World & world,const std::vector<Function<T,NDIM>> & v)421 double norm2(World& world, 422 const std::vector< Function<T,NDIM> >& v) { 423 PROFILE_BLOCK(Vnorm2); 424 std::vector<double> norms(v.size()); 425 for (unsigned int i=0; i<v.size(); ++i) norms[i] = v[i].norm2sq_local(); 426 world.gop.sum(&norms[0], norms.size()); 427 for (unsigned int i=1; i<v.size(); ++i) norms[0] += norms[i]; 428 world.gop.fence(); 429 return sqrt(norms[0]); 430 } 431 conj(double x)432 inline double conj(double x) { 433 return x; 434 } 435 conj(float x)436 inline double conj(float x) { 437 return x; 438 } 439 440 // !!! FIXME: this task is broken because FunctionImpl::inner_local forces a 441 // future on return from WorldTaskQueue::reduce, which will causes a deadlock if 442 // run inside a task. This behavior must be changed before this task can be used 443 // again. 444 // 445 // template <typename T, typename R, std::size_t NDIM> 446 // struct MatrixInnerTask : public TaskInterface { 447 // Tensor<TENSOR_RESULT_TYPE(T,R)> result; // Must be a copy 448 // const Function<T,NDIM>& f; 449 // const std::vector< Function<R,NDIM> >& g; 450 // long jtop; 451 // 452 // MatrixInnerTask(const Tensor<TENSOR_RESULT_TYPE(T,R)>& result, 453 // const Function<T,NDIM>& f, 454 // const std::vector< Function<R,NDIM> >& g, 455 // long jtop) 456 // : result(result), f(f), g(g), jtop(jtop) {} 457 // 458 // void run(World& world) { 459 // for (long j=0; j<jtop; ++j) { 460 // result(j) = f.inner_local(g[j]); 461 // } 462 // } 463 // 464 // private: 465 // /// Get the task id 466 // 467 // /// \param id The id to set for this task 468 // virtual void get_id(std::pair<void*,unsigned short>& id) const { 469 // PoolTaskInterface::make_id(id, *this); 470 // } 471 // }; // struct MatrixInnerTask 472 473 474 475 template <typename T, std::size_t NDIM> 476 DistributedMatrix<T> matrix_inner(const DistributedMatrixDistribution& d, 477 const std::vector< Function<T,NDIM> >& f, 478 const std::vector< Function<T,NDIM> >& g, 479 bool sym=false) 480 { 481 PROFILE_FUNC; 482 DistributedMatrix<T> A(d); 483 const int64_t n = A.coldim(); 484 const int64_t m = A.rowdim(); 485 MADNESS_ASSERT(int64_t(f.size()) == n && int64_t(g.size()) == m); 486 487 // Assume we can always create an ichunk*jchunk matrix locally 488 const int ichunk = 1000; 489 const int jchunk = 1000; // 1000*1000*8 = 8 MBytes 490 for (int64_t ilo=0; ilo<n; ilo+=ichunk) { 491 int64_t ihi = std::min(ilo + ichunk, n); 492 std::vector< Function<T,NDIM> > ivec(f.begin()+ilo, f.begin()+ihi); 493 for (int64_t jlo=0; jlo<m; jlo+=jchunk) { 494 int64_t jhi = std::min(jlo + jchunk, m); 495 std::vector< Function<T,NDIM> > jvec(g.begin()+jlo, g.begin()+jhi); 496 497 Tensor<T> P = matrix_inner(A.get_world(),ivec,jvec); 498 A.copy_from_replicated_patch(ilo, ihi-1, jlo, jhi-1, P); 499 } 500 } 501 return A; 502 } 503 504 /// Computes the matrix inner product of two function vectors - q(i,j) = inner(f[i],g[j]) 505 506 /// For complex types symmetric is interpreted as Hermitian. 507 /// 508 /// The current parallel loop is non-optimal but functional. 509 template <typename T, typename R, std::size_t NDIM> 510 Tensor< TENSOR_RESULT_TYPE(T,R) > matrix_inner(World& world, 511 const std::vector< Function<T,NDIM> >& f, 512 const std::vector< Function<R,NDIM> >& g, 513 bool sym=false) 514 { 515 world.gop.fence(); 516 compress(world, f); 517 if ((void*)(&f) != (void*)(&g)) compress(world, g); 518 519 std::vector<const FunctionImpl<T,NDIM>*> left(f.size()); 520 std::vector<const FunctionImpl<R,NDIM>*> right(g.size()); 521 for (unsigned int i=0; i<f.size(); i++) left[i] = f[i].get_impl().get(); 522 for (unsigned int i=0; i<g.size(); i++) right[i]= g[i].get_impl().get(); 523 524 Tensor< TENSOR_RESULT_TYPE(T,R) > r= FunctionImpl<T,NDIM>::inner_local(left, right, sym); 525 526 world.gop.fence(); 527 world.gop.sum(r.ptr(),f.size()*g.size()); 528 529 return r; 530 } 531 532 /// Computes the matrix inner product of two function vectors - q(i,j) = inner(f[i],g[j]) 533 534 /// For complex types symmetric is interpreted as Hermitian. 535 /// 536 /// The current parallel loop is non-optimal but functional. 537 template <typename T, typename R, std::size_t NDIM> 538 Tensor< TENSOR_RESULT_TYPE(T,R) > matrix_inner_old(World& world, 539 const std::vector< Function<T,NDIM> >& f, 540 const std::vector< Function<R,NDIM> >& g, 541 bool sym=false) { 542 PROFILE_BLOCK(Vmatrix_inner); 543 long n=f.size(), m=g.size(); 544 Tensor< TENSOR_RESULT_TYPE(T,R) > r(n,m); 545 if (sym) MADNESS_ASSERT(n==m); 546 547 world.gop.fence(); 548 compress(world, f); 549 if ((void*)(&f) != (void*)(&g)) compress(world, g); 550 551 for (long i=0; i<n; ++i) { 552 long jtop = m; 553 if (sym) jtop = i+1; 554 for (long j=0; j<jtop; ++j) { 555 r(i,j) = f[i].inner_local(g[j]); 556 if (sym) r(j,i) = conj(r(i,j)); 557 } 558 } 559 560 // for (long i=n-1; i>=0; --i) { 561 // long jtop = m; 562 // if (sym) jtop = i+1; 563 // world.taskq.add(new MatrixInnerTask<T,R,NDIM>(r(i,_), f[i], g, jtop)); 564 // } 565 world.gop.fence(); 566 world.gop.sum(r.ptr(),n*m); 567 568 // if (sym) { 569 // for (int i=0; i<n; ++i) { 570 // for (int j=0; j<i; ++j) { 571 // r(j,i) = conj(r(i,j)); 572 // } 573 // } 574 // } 575 return r; 576 } 577 578 /// Computes the element-wise inner product of two function vectors - q(i) = inner(f[i],g[i]) 579 template <typename T, typename R, std::size_t NDIM> inner(World & world,const std::vector<Function<T,NDIM>> & f,const std::vector<Function<R,NDIM>> & g)580 Tensor< TENSOR_RESULT_TYPE(T,R) > inner(World& world, 581 const std::vector< Function<T,NDIM> >& f, 582 const std::vector< Function<R,NDIM> >& g) { 583 PROFILE_BLOCK(Vinnervv); 584 long n=f.size(), m=g.size(); 585 MADNESS_ASSERT(n==m); 586 Tensor< TENSOR_RESULT_TYPE(T,R) > r(n); 587 588 compress(world, f); 589 compress(world, g); 590 591 for (long i=0; i<n; ++i) { 592 r(i) = f[i].inner_local(g[i]); 593 } 594 595 world.taskq.fence(); 596 world.gop.sum(r.ptr(),n); 597 world.gop.fence(); 598 return r; 599 } 600 601 602 /// Computes the inner product of a function with a function vector - q(i) = inner(f,g[i]) 603 template <typename T, typename R, std::size_t NDIM> inner(World & world,const Function<T,NDIM> & f,const std::vector<Function<R,NDIM>> & g)604 Tensor< TENSOR_RESULT_TYPE(T,R) > inner(World& world, 605 const Function<T,NDIM>& f, 606 const std::vector< Function<R,NDIM> >& g) { 607 PROFILE_BLOCK(Vinner); 608 long n=g.size(); 609 Tensor< TENSOR_RESULT_TYPE(T,R) > r(n); 610 611 f.compress(); 612 compress(world, g); 613 614 for (long i=0; i<n; ++i) { 615 r(i) = f.inner_local(g[i]); 616 } 617 618 world.taskq.fence(); 619 world.gop.sum(r.ptr(),n); 620 world.gop.fence(); 621 return r; 622 } 623 624 /// inner function with right signature for the nonlinear sovler 625 /// this is needed for the KAIN solvers and other functions 626 template <typename T, typename R, std::size_t NDIM> inner(const std::vector<Function<T,NDIM>> & f,const std::vector<Function<R,NDIM>> & g)627 double inner( const std::vector< Function<T,NDIM> >& f, 628 const std::vector< Function<R,NDIM> >& g){ 629 MADNESS_ASSERT(f.size()==g.size()); 630 if(f.empty()) return 0.0; 631 else return inner(f[0].world(),f,g).sum(); 632 } 633 634 635 /// Multiplies a function against a vector of functions --- q[i] = a * v[i] 636 template <typename T, typename R, std::size_t NDIM> 637 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > 638 mul(World& world, 639 const Function<T,NDIM>& a, 640 const std::vector< Function<R,NDIM> >& v, 641 bool fence=true) { 642 PROFILE_BLOCK(Vmul); 643 a.reconstruct(false); 644 reconstruct(world, v, false); 645 world.gop.fence(); 646 return vmulXX(a, v, 0.0, fence); 647 } 648 649 /// Multiplies a function against a vector of functions using sparsity of a and v[i] --- q[i] = a * v[i] 650 template <typename T, typename R, std::size_t NDIM> 651 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > 652 mul_sparse(World& world, 653 const Function<T,NDIM>& a, 654 const std::vector< Function<R,NDIM> >& v, 655 double tol, 656 bool fence=true) { 657 PROFILE_BLOCK(Vmulsp); 658 a.reconstruct(false); 659 reconstruct(world, v, false); 660 world.gop.fence(); 661 for (unsigned int i=0; i<v.size(); ++i) { 662 v[i].norm_tree(false); 663 } 664 a.norm_tree(); 665 return vmulXX(a, v, tol, fence); 666 } 667 668 /// Makes the norm tree for all functions in a vector 669 template <typename T, std::size_t NDIM> 670 void norm_tree(World& world, 671 const std::vector< Function<T,NDIM> >& v, 672 bool fence=true) 673 { 674 PROFILE_BLOCK(Vnorm_tree); 675 for (unsigned int i=0; i<v.size(); ++i) { 676 v[i].norm_tree(false); 677 } 678 if (fence) world.gop.fence(); 679 } 680 681 /// Multiplies two vectors of functions q[i] = a[i] * b[i] 682 template <typename T, typename R, std::size_t NDIM> 683 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > 684 mul(World& world, 685 const std::vector< Function<T,NDIM> >& a, 686 const std::vector< Function<R,NDIM> >& b, 687 bool fence=true) { 688 PROFILE_BLOCK(Vmulvv); 689 reconstruct(world, a, true); 690 if (&a != &b) reconstruct(world, b, true); 691 692 std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > q(a.size()); 693 for (unsigned int i=0; i<a.size(); ++i) { 694 q[i] = mul(a[i], b[i], false); 695 } 696 if (fence) world.gop.fence(); 697 return q; 698 } 699 700 701 /// Computes the square of a vector of functions --- q[i] = v[i]**2 702 template <typename T, std::size_t NDIM> 703 std::vector< Function<T,NDIM> > 704 square(World& world, 705 const std::vector< Function<T,NDIM> >& v, 706 bool fence=true) { 707 return mul<T,T,NDIM>(world, v, v, fence); 708 // std::vector< Function<T,NDIM> > vsq(v.size()); 709 // for (unsigned int i=0; i<v.size(); ++i) { 710 // vsq[i] = square(v[i], false); 711 // } 712 // if (fence) world.gop.fence(); 713 // return vsq; 714 } 715 716 717 /// Sets the threshold in a vector of functions 718 template <typename T, std::size_t NDIM> 719 void set_thresh(World& world, std::vector< Function<T,NDIM> >& v, double thresh, bool fence=true) { 720 for (unsigned int j=0; j<v.size(); ++j) { 721 v[j].set_thresh(thresh,false); 722 } 723 if (fence) world.gop.fence(); 724 } 725 726 /// Returns the complex conjugate of the vector of functions 727 template <typename T, std::size_t NDIM> 728 std::vector< Function<T,NDIM> > 729 conj(World& world, 730 const std::vector< Function<T,NDIM> >& v, 731 bool fence=true) { 732 PROFILE_BLOCK(Vconj); 733 std::vector< Function<T,NDIM> > r = copy(world, v); // Currently don't have oop conj 734 for (unsigned int i=0; i<v.size(); ++i) { 735 r[i].conj(false); 736 } 737 if (fence) world.gop.fence(); 738 return r; 739 } 740 741 742 /// Returns a deep copy of a vector of functions 743 template <typename T, std::size_t NDIM> 744 std::vector< Function<T,NDIM> > 745 copy(World& world, 746 const std::vector< Function<T,NDIM> >& v, 747 bool fence=true) { 748 PROFILE_BLOCK(Vcopy); 749 std::vector< Function<T,NDIM> > r(v.size()); 750 for (unsigned int i=0; i<v.size(); ++i) { 751 r[i] = copy(v[i], false); 752 } 753 if (fence) world.gop.fence(); 754 return r; 755 } 756 757 /// Returns a vector of deep copies of of a function 758 template <typename T, std::size_t NDIM> 759 std::vector< Function<T,NDIM> > 760 copy(World& world, 761 const Function<T,NDIM>& v, 762 const unsigned int n, 763 bool fence=true) { 764 PROFILE_BLOCK(Vcopy1); 765 std::vector< Function<T,NDIM> > r(n); 766 for (unsigned int i=0; i<n; ++i) { 767 r[i] = copy(v, false); 768 } 769 if (fence) world.gop.fence(); 770 return r; 771 } 772 773 /// Returns new vector of functions --- q[i] = a[i] + b[i] 774 template <typename T, typename R, std::size_t NDIM> 775 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > 776 add(World& world, 777 const std::vector< Function<T,NDIM> >& a, 778 const std::vector< Function<R,NDIM> >& b, 779 bool fence=true) { 780 PROFILE_BLOCK(Vadd); 781 MADNESS_ASSERT(a.size() == b.size()); 782 compress(world, a); 783 compress(world, b); 784 785 std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > r(a.size()); 786 for (unsigned int i=0; i<a.size(); ++i) { 787 r[i] = add(a[i], b[i], false); 788 } 789 if (fence) world.gop.fence(); 790 return r; 791 } 792 793 /// Returns new vector of functions --- q[i] = a + b[i] 794 template <typename T, typename R, std::size_t NDIM> 795 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > 796 add(World& world, 797 const Function<T,NDIM> & a, 798 const std::vector< Function<R,NDIM> >& b, 799 bool fence=true) { 800 PROFILE_BLOCK(Vadd1); 801 a.compress(); 802 compress(world, b); 803 804 std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > r(b.size()); 805 for (unsigned int i=0; i<b.size(); ++i) { 806 r[i] = add(a, b[i], false); 807 } 808 if (fence) world.gop.fence(); 809 return r; 810 } 811 template <typename T, typename R, std::size_t NDIM> 812 inline std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > 813 add(World& world, 814 const std::vector< Function<R,NDIM> >& b, 815 const Function<T,NDIM> & a, 816 bool fence=true) { 817 return add(world, a, b, fence); 818 } 819 820 /// Returns new vector of functions --- q[i] = a[i] - b[i] 821 template <typename T, typename R, std::size_t NDIM> 822 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > 823 sub(World& world, 824 const std::vector< Function<T,NDIM> >& a, 825 const std::vector< Function<R,NDIM> >& b, 826 bool fence=true) { 827 PROFILE_BLOCK(Vsub); 828 MADNESS_ASSERT(a.size() == b.size()); 829 compress(world, a); 830 compress(world, b); 831 832 std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > r(a.size()); 833 for (unsigned int i=0; i<a.size(); ++i) { 834 r[i] = sub(a[i], b[i], false); 835 } 836 if (fence) world.gop.fence(); 837 return r; 838 } 839 840 /// Returns new function --- q = sum_i f[i] 841 template <typename T, std::size_t NDIM> 842 Function<T, NDIM> sum(World& world, const std::vector<Function<T,NDIM> >& f, 843 bool fence=true) { 844 845 compress(world, f); 846 Function<T,NDIM> r=real_factory_3d(world).compressed(); 847 848 for (unsigned int i=0; i<f.size(); ++i) r.gaxpy(1.0,f[i],1.0,false); 849 if (fence) world.gop.fence(); 850 return r; 851 } 852 853 854 /// Multiplies and sums two vectors of functions r = \sum_i a[i] * b[i] 855 template <typename T, typename R, std::size_t NDIM> 856 Function<TENSOR_RESULT_TYPE(T,R), NDIM> 857 dot(World& world, 858 const std::vector< Function<T,NDIM> >& a, 859 const std::vector< Function<R,NDIM> >& b, 860 bool fence=true) { 861 return sum(world,mul(world,a,b,true),fence); 862 } 863 864 865 866 /// Generalized A*X+Y for vectors of functions ---- a[i] = alpha*a[i] + beta*b[i] 867 template <typename T, typename Q, typename R, std::size_t NDIM> 868 void gaxpy(World& world, 869 Q alpha, 870 std::vector< Function<T,NDIM> >& a, 871 Q beta, 872 const std::vector< Function<R,NDIM> >& b, 873 bool fence=true) { 874 PROFILE_BLOCK(Vgaxpy); 875 MADNESS_ASSERT(a.size() == b.size()); 876 compress(world, a); 877 compress(world, b); 878 879 for (unsigned int i=0; i<a.size(); ++i) { 880 a[i].gaxpy(alpha, b[i], beta, false); 881 } 882 if (fence) world.gop.fence(); 883 } 884 885 886 /// Applies a vector of operators to a vector of functions --- q[i] = apply(op[i],f[i]) 887 template <typename opT, typename R, std::size_t NDIM> 888 std::vector< Function<TENSOR_RESULT_TYPE(typename opT::opT,R), NDIM> > apply(World & world,const std::vector<std::shared_ptr<opT>> & op,const std::vector<Function<R,NDIM>> f)889 apply(World& world, 890 const std::vector< std::shared_ptr<opT> >& op, 891 const std::vector< Function<R,NDIM> > f) { 892 893 PROFILE_BLOCK(Vapplyv); 894 MADNESS_ASSERT(f.size()==op.size()); 895 896 std::vector< Function<R,NDIM> >& ncf = *const_cast< std::vector< Function<R,NDIM> >* >(&f); 897 898 reconstruct(world, f); 899 nonstandard(world, ncf); 900 901 std::vector< Function<TENSOR_RESULT_TYPE(typename opT::opT,R), NDIM> > result(f.size()); 902 for (unsigned int i=0; i<f.size(); ++i) { 903 MADNESS_ASSERT(not op[i]->is_slaterf12); 904 result[i] = apply_only(*op[i], f[i], false); 905 } 906 907 world.gop.fence(); 908 909 standard(world, ncf, false); // restores promise of logical constness 910 world.gop.fence(); 911 reconstruct(world, result); 912 913 return result; 914 } 915 916 917 /// Applies an operator to a vector of functions --- q[i] = apply(op,f[i]) 918 template <typename T, typename R, std::size_t NDIM> 919 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > apply(World & world,const SeparatedConvolution<T,NDIM> & op,const std::vector<Function<R,NDIM>> f)920 apply(World& world, 921 const SeparatedConvolution<T,NDIM>& op, 922 const std::vector< Function<R,NDIM> > f) { 923 PROFILE_BLOCK(Vapply); 924 925 std::vector< Function<R,NDIM> >& ncf = *const_cast< std::vector< Function<R,NDIM> >* >(&f); 926 927 reconstruct(world, f); 928 nonstandard(world, ncf); 929 930 std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > result(f.size()); 931 for (unsigned int i=0; i<f.size(); ++i) { 932 result[i] = apply_only(op, f[i], false); 933 } 934 935 world.gop.fence(); 936 937 standard(world, ncf, false); // restores promise of logical constness 938 reconstruct(world, result); 939 940 if (op.is_slaterf12) { 941 MADNESS_ASSERT(not op.destructive()); 942 for (unsigned int i=0; i<f.size(); ++i) { 943 double trace=f[i].trace(); 944 result[i]=(result[i]-trace).scale(-0.5/op.mu()); 945 } 946 } 947 948 return result; 949 } 950 951 /// Normalizes a vector of functions --- v[i] = v[i].scale(1.0/v[i].norm2()) 952 template <typename T, std::size_t NDIM> 953 void normalize(World& world, std::vector< Function<T,NDIM> >& v, bool fence=true) { 954 PROFILE_BLOCK(Vnormalize); 955 std::vector<double> nn = norm2s(world, v); 956 for (unsigned int i=0; i<v.size(); ++i) v[i].scale(1.0/nn[i],false); 957 if (fence) world.gop.fence(); 958 } 959 960 template <typename T, std::size_t NDIM> 961 void print_size(World &world, const std::vector<Function<T,NDIM> > &v, const std::string &msg = "vectorfunction" ){ 962 if(v.empty()){ 963 if(world.rank()==0) std::cout << "print_size: " << msg << " is empty" << std::endl; 964 }else if(v.size()==1){ 965 v.front().print_size(msg); 966 }else{ 967 for(auto x:v){ 968 x.print_size(msg); 969 } 970 } 971 } 972 973 // gives back the size in GB 974 template <typename T, std::size_t NDIM> get_size(World & world,const std::vector<Function<T,NDIM>> & v)975 double get_size(World& world, const std::vector< Function<T,NDIM> >& v){ 976 977 if (v.empty()) return 0.0; 978 979 const double d=sizeof(T); 980 const double fac=1024*1024*1024; 981 982 double size=0.0; 983 for(unsigned int i=0;i<v.size();i++){ 984 if (v[i].is_initialized()) size+=v[i].size(); 985 } 986 987 return size/fac*d; 988 989 } 990 991 // gives back the size in GB 992 template <typename T, std::size_t NDIM> get_size(const Function<T,NDIM> & f)993 double get_size(const Function<T,NDIM> & f){ 994 const double d=sizeof(T); 995 const double fac=1024*1024*1024; 996 double size=f.size(); 997 return size/fac*d; 998 } 999 1000 /// apply op on the input vector yielding an output vector of functions 1001 1002 /// @param[in] op the operator working on vin 1003 /// @param[in] vin vector of input Functions; needs to be refined to common level! 1004 /// @return vector of output Functions vout = op(vin) 1005 template <typename T, typename opT, std::size_t NDIM> 1006 std::vector<Function<T,NDIM> > multi_to_multi_op_values(const opT& op, 1007 const std::vector< Function<T,NDIM> >& vin, 1008 const bool fence=true) { 1009 MADNESS_ASSERT(vin.size()>0); 1010 MADNESS_ASSERT(vin[0].is_initialized()); // might be changed 1011 World& world=vin[0].world(); 1012 Function<T,NDIM> dummy; 1013 dummy.set_impl(vin[0], false); 1014 std::vector<Function<T,NDIM> > vout=zero_functions<T,NDIM>(world, op.get_result_size()); 1015 for (auto& out : vout) out.set_impl(vin[0],false); 1016 dummy.multi_to_multi_op_values(op, vin, vout, fence); 1017 return vout; 1018 } 1019 1020 1021 1022 1023 // convenience operators 1024 1025 template <typename T, std::size_t NDIM> 1026 std::vector<Function<T,NDIM> > operator+(const std::vector<Function<T,NDIM> >& lhs, 1027 const std::vector<Function<T,NDIM>>& rhs) { 1028 return add(lhs[0].world(),lhs,rhs); 1029 } 1030 1031 template <typename T, std::size_t NDIM> 1032 std::vector<Function<T,NDIM> > operator-(const std::vector<Function<T,NDIM> >& lhs, 1033 const std::vector<Function<T,NDIM> >& rhs) { 1034 return sub(lhs[0].world(),lhs,rhs); 1035 } 1036 1037 template <typename T, std::size_t NDIM> 1038 std::vector<Function<T,NDIM> > operator*(const double fac, 1039 const std::vector<Function<T,NDIM> >& rhs) { 1040 std::vector<Function<T,NDIM> > tmp=copy(rhs[0].world(),rhs); 1041 scale(tmp[0].world(),tmp,fac); 1042 return tmp; 1043 } 1044 1045 template <typename T, std::size_t NDIM> 1046 std::vector<Function<T,NDIM> > operator*(const std::vector<Function<T,NDIM> >& rhs, 1047 const double fac) { 1048 std::vector<Function<T,NDIM> > tmp=copy(rhs[0].world(),rhs); 1049 scale(tmp[0].world(),tmp,fac); 1050 return tmp; 1051 } 1052 1053 /// multiply a vector of functions with a function: r[i] = v[i] * a 1054 template <typename T, std::size_t NDIM> 1055 std::vector<Function<T,NDIM> > operator*(const Function<T,NDIM>& a, 1056 const std::vector<Function<T,NDIM> >& v) { 1057 return mul(v[0].world(),a,v,true); 1058 } 1059 1060 1061 /// multiply a vector of functions with a function: r[i] = a * v[i] 1062 template <typename T, std::size_t NDIM> 1063 std::vector<Function<T,NDIM> > operator*(const std::vector<Function<T,NDIM> >& v, 1064 const Function<T,NDIM>& a) { 1065 return mul(v[0].world(),a,v,true); 1066 } 1067 1068 1069 template <typename T, std::size_t NDIM> 1070 std::vector<Function<T,NDIM> > operator+=(std::vector<Function<T,NDIM> >& rhs, 1071 const std::vector<Function<T,NDIM> >& lhs) { 1072 rhs=add(rhs[0].world(),rhs,lhs); 1073 return rhs; 1074 } 1075 1076 template <typename T, std::size_t NDIM> 1077 std::vector<Function<T,NDIM> > operator-=(std::vector<Function<T,NDIM> >& rhs, 1078 const std::vector<Function<T,NDIM> >& lhs) { 1079 rhs=sub(rhs[0].world(),rhs,lhs); 1080 return rhs; 1081 } 1082 1083 /// shorthand gradient operator 1084 1085 /// returns the differentiated function f in all NDIM directions 1086 /// @param[in] f the function on which the grad operator works on 1087 /// @param[in] refine refinement before diff'ing makes the result more accurate 1088 /// @param[in] fence fence after completion; if reconstruction is needed always fence 1089 /// @return the vector \frac{\partial}{\partial x_i} f 1090 template <typename T, std::size_t NDIM> 1091 std::vector<Function<T,NDIM> > grad(const Function<T,NDIM>& f, 1092 bool refine=false, bool fence=true) { 1093 1094 World& world=f.world(); 1095 f.reconstruct(); 1096 if (refine) f.refine(); // refine to make result more precise 1097 1098 std::vector< std::shared_ptr< Derivative<T,NDIM> > > grad= 1099 gradient_operator<T,NDIM>(world); 1100 1101 std::vector<Function<T,NDIM> > result(NDIM); 1102 for (int i=0; i<NDIM; ++i) result[i]=apply(*(grad[i]),f,false); 1103 if (fence) world.gop.fence(); 1104 return result; 1105 } 1106 1107 /// shorthand div operator 1108 1109 /// returns the dot product of nabla with a vector f 1110 /// @param[in] f the vector of functions on which the div operator works on 1111 /// @param[in] refine refinement before diff'ing makes the result more accurate 1112 /// @param[in] fence fence after completion; currently always fences 1113 /// @return the vector \frac{\partial}{\partial x_i} f 1114 /// TODO: add this to operator fusion 1115 template <typename T, std::size_t NDIM> 1116 Function<T,NDIM> div(const std::vector<Function<T,NDIM> >& v, 1117 bool do_refine=false, bool fence=true) { 1118 1119 MADNESS_ASSERT(v.size()>0); 1120 World& world=v[0].world(); 1121 reconstruct(world,v); 1122 if (do_refine) refine(world,v); // refine to make result more precise 1123 1124 std::vector< std::shared_ptr< Derivative<T,NDIM> > > grad= 1125 gradient_operator<T,NDIM>(world); 1126 1127 std::vector<Function<T,NDIM> > result(NDIM); 1128 for (int i=0; i<NDIM; ++i) result[i]=apply(*(grad[i]),v[i],false); 1129 world.gop.fence(); 1130 return sum(world,result,fence); 1131 } 1132 1133 /// load a vector of functions 1134 template<typename T, size_t NDIM> load_function(World & world,std::vector<Function<T,NDIM>> & f,const std::string name)1135 void load_function(World& world, std::vector<Function<T,NDIM> >& f, 1136 const std::string name) { 1137 if (world.rank()==0) print("loading vector of functions",name); 1138 archive::ParallelInputArchive ar(world, name.c_str(), 1); 1139 std::size_t fsize=0; 1140 ar & fsize; 1141 f.resize(fsize); 1142 for (std::size_t i=0; i<fsize; ++i) ar & f[i]; 1143 } 1144 1145 /// save a vector of functions 1146 template<typename T, size_t NDIM> save_function(const std::vector<Function<T,NDIM>> & f,const std::string name)1147 void save_function(const std::vector<Function<T,NDIM> >& f, const std::string name) { 1148 if (f.size()>0) { 1149 World& world=f.front().world(); 1150 if (world.rank()==0) print("saving vector of functions",name); 1151 archive::ParallelOutputArchive ar(world, name.c_str(), 1); 1152 std::size_t fsize=f.size(); 1153 ar & fsize; 1154 for (std::size_t i=0; i<fsize; ++i) ar & f[i]; 1155 } 1156 } 1157 1158 1159 } 1160 #endif // MADNESS_MRA_VMRA_H__INCLUDED 1161