1 #ifndef QMCPLUSPLUS_AFQMC_SLATERDETOPERATIONS_H 2 #define QMCPLUSPLUS_AFQMC_SLATERDETOPERATIONS_H 3 4 #include <fstream> 5 6 #include "AFQMC/config.h" 7 #include "Message/MPIObjectBase.h" 8 #include "Numerics/DeterminantOperators.h" 9 #include "Numerics/MatrixOperators.h" 10 #include "Message/Communicate.h" 11 //#include "Message/CommOperators.h" 12 13 #include "AFQMC/Hamiltonians/HamiltonianBase.h" 14 #include "AFQMC/Walkers/SlaterDetWalker.h" 15 #include "AFQMC/Numerics/DenseMatrixOperations.h" 16 #include "AFQMC/Numerics/SparseMatrixOperations.h" 17 18 namespace qmcplusplus 19 { 20 //class SlaterDetOperations: public EstimatorBase 21 class SlaterDetOperations : public MPIObjectBase, public AFQMCInfo 22 { 23 public: 24 typedef HamiltonianBase* HamPtr; 25 SlaterDetOperations(Communicate * c)26 SlaterDetOperations(Communicate* c) : MPIObjectBase(c), ham(NULL) {} 27 setup(HamPtr h,myTimer * timer_)28 void setup(HamPtr h, myTimer* timer_) 29 { 30 ham = h; 31 timer = timer_; 32 GF.resize(2 * NMO, NMO); 33 tGF.resize(2 * NMO, NMO); 34 V0.resize(2 * NMO * NMO); 35 Cwork.resize(2 * NMO); 36 pivot.resize(2 * NMO); 37 } 38 39 void green_function(ComplexType* A, ComplexType* B, ComplexType& ovlp, SPComplexMatrix& G, bool getG = true) 40 { 41 const ComplexType one = ComplexType(1.0); 42 const ComplexType zero = ComplexType(0.0); 43 44 // G = transpose( B * ( transpose(conjg(A)) * B )^-1 * transpose(conjg(A)) ) 45 ComplexMatrix S0(NAEA, NAEA); 46 ComplexMatrix S1(NAEB, NAEB); 47 ComplexMatrix SS0(2 * NMO, NAEA); 48 49 50 if (getG) 51 tGF = ComplexType(0.0); 52 S0 = ComplexType(0.0); 53 S1 = ComplexType(0.0); 54 55 // S0 = transpose(conjg(A))*B 56 DenseMatrixOperators::product_AhB(NAEA, NAEA, NMO, one, A, NAEA, B, NAEA, zero, S0.data(), NAEA); 57 58 // S0 = S0^-1 59 ovlp = Invert(S0.data(), NAEA, NAEA, Cwork.data(), pivot.data()); 60 61 // SS0 = B * S0 62 if (getG) 63 DenseMatrixOperators::product(NMO, NAEA, NAEA, one, B, NAEA, S0.data(), NAEA, zero, SS0.data(), NAEA); 64 // G(beta) = SS0*transpose(conjg(A)) 65 if (getG) 66 DenseMatrixOperators::product_ABh(NMO, NMO, NAEA, one, SS0.data(), NAEA, A, NAEA, zero, tGF.data(), NMO); 67 68 // S1 = transpose(conjg(A))*B 69 DenseMatrixOperators::product_AhB(NAEB, NAEB, NMO, one, A + NMO * NAEA, NAEA, B + NAEA * NMO, NAEA, zero, S1.data(), 70 NAEB); 71 72 // S0 = S0^-1 73 ovlp *= Invert(S1.data(), NAEB, NAEB, Cwork.data(), pivot.data()); 74 75 if (!getG) 76 return; 77 78 if (std::abs(ovlp) < 1e-6) 79 { 80 G = SPComplexType(0.0); 81 return; 82 } 83 84 // SS0(beta) = B(beta) * S1 85 DenseMatrixOperators::product(NMO, NAEB, NAEB, one, B + NAEA * NMO, NAEA, S1.data(), NAEB, zero, 86 SS0.data() + NAEA * NMO, NAEA); 87 88 // G(beta) = SS0*transpose(conjg(A)) 89 DenseMatrixOperators::product_ABh(NMO, NMO, NAEB, one, SS0.data() + NAEA * NMO, NAEA, A + NAEA * NMO, NAEA, zero, 90 tGF.data() + NMO * NMO, NMO); 91 92 for (int i = 0; i < NMO; i++) 93 for (int j = 0; j < i; j++) 94 { 95 std::swap(tGF(i, j), tGF(j, i)); 96 std::swap(tGF(i + NMO, j), tGF(j + NMO, i)); 97 } 98 std::copy(tGF.begin(), tGF.end(), G.begin()); 99 } 100 101 void green_function(ComplexMatrix& A, ComplexMatrix& B, ComplexType& ovlp, ComplexMatrix& G, bool getG = true) 102 { 103 const ComplexType one = ComplexType(1.0); 104 const ComplexType zero = ComplexType(0.0); 105 106 // G = transpose( B * ( transpose(conjg(A)) * B )^-1 * transpose(conjg(A)) ) 107 ComplexMatrix S0(NAEA, NAEA); 108 ComplexMatrix S1(NAEB, NAEB); 109 ComplexMatrix SS0(2 * NMO, NAEA); 110 111 if (getG) 112 G = ComplexType(0.0); 113 S0 = ComplexType(0.0); 114 S1 = ComplexType(0.0); 115 116 // S0 = transpose(conjg(A))*B 117 DenseMatrixOperators::product_AhB(NAEA, NAEA, NMO, one, A.data(), NAEA, B.data(), NAEA, zero, S0.data(), NAEA); 118 119 // S0 = S0^-1 120 ovlp = Invert(S0.data(), NAEA, NAEA, Cwork.data(), pivot.data()); 121 122 // SS0 = B * S0 123 if (getG) 124 DenseMatrixOperators::product(NMO, NAEA, NAEA, one, B.data(), NAEA, S0.data(), NAEA, zero, SS0.data(), NAEA); 125 // G(beta) = SS0*transpose(conjg(A)) 126 if (getG) 127 DenseMatrixOperators::product_ABh(NMO, NMO, NAEA, one, SS0.data(), NAEA, A.data(), NAEA, zero, G.data(), NMO); 128 129 // S1 = transpose(conjg(A))*B 130 DenseMatrixOperators::product_AhB(NAEB, NAEB, NMO, one, A.data() + NMO * NAEA, NAEA, B.data() + NAEA * NMO, NAEA, 131 zero, S1.data(), NAEB); 132 133 // S0 = S0^-1 134 ovlp *= Invert(S1.data(), NAEB, NAEB, Cwork.data(), pivot.data()); 135 136 if (!getG) 137 return; 138 139 if (std::abs(ovlp) < 1e-6) 140 { 141 G = ComplexType(0.0); 142 return; 143 } 144 145 // SS0(beta) = B(beta) * S1 146 DenseMatrixOperators::product(NMO, NAEB, NAEB, one, B.data() + NAEA * NMO, NAEA, S1.data(), NAEB, zero, 147 SS0.data() + NAEA * NMO, NAEA); 148 149 // G(beta) = SS0*transpose(conjg(A)) 150 DenseMatrixOperators::product_ABh(NMO, NMO, NAEB, one, SS0.data() + NAEA * NMO, NAEA, A.data() + NAEA * NMO, NAEA, 151 zero, G.data() + NMO * NMO, NMO); 152 153 for (int i = 0; i < NMO; i++) 154 for (int j = 0; j < i; j++) 155 { 156 std::swap(G(i, j), G(j, i)); 157 std::swap(G(i + NMO, j), G(j + NMO, i)); 158 } 159 } 160 matrix_element_and_overlap(ComplexType * A,ComplexType * B,ComplexType & ovlp,ComplexType & hamME)161 void matrix_element_and_overlap(ComplexType* A, ComplexType* B, ComplexType& ovlp, ComplexType& hamME) 162 { 163 green_function(A, B, ovlp, GF); 164 165 if (std::abs(ovlp) < 1e-6) 166 { 167 ovlp = ComplexType(0.0); 168 hamME = ComplexType(0.0); 169 return; 170 } 171 172 SPValueSMSpMat* V; 173 std::vector<s1D<ValueType>>* h; 174 int nr1 = 1, nc1 = 2 * NMO * NMO; 175 SPValueType one = SPValueType(1.0); 176 SPValueType zero = SPValueType(0.0); 177 178 ham->getFullHam(h, V); 179 180 hamME = ComplexType(0.0); 181 182 SparseMatrixOperators::product_SpMatV(nc1, nc1, one, V->values(), V->column_data(), V->row_index(), GF.data(), zero, 183 V0.data()); 184 185 SPComplexMatrix::iterator itG = GF.begin(); 186 SPComplexVector::iterator itV = V0.begin(); 187 for (int i = 0; i < nc1; i++, ++itG, ++itV) 188 hamME += static_cast<ComplexType>(*itV) * static_cast<ComplexType>(*itG); 189 hamME = 0.5 * hamME + ham->NuclearCoulombEnergy; 190 191 std::vector<s1D<ValueType>>::iterator end1 = h->end(); 192 itG = GF.begin(); 193 for (std::vector<s1D<ValueType>>::iterator it = h->begin(); it != end1; it++) 194 hamME += static_cast<ComplexType>(*(itG + std::get<0>(*it))) * std::get<1>(*it); 195 196 hamME *= ovlp; 197 } 198 199 void diag(std::vector<SlaterDetWalker>::iterator itbegin, 200 std::vector<SlaterDetWalker>::iterator itend, 201 int nstates, 202 std::vector<RealType>& eigVal, 203 ComplexMatrix& eigVec, 204 ComplexType& exactEnergy, 205 ComplexMatrix* HF, 206 bool getEigV = false) 207 { 208 if (myComm->size() > 1) 209 APP_ABORT(" ERROR: Estimators::SlaterDetOperations::diag(): Only implemented in serial. \n"); 210 211 int N = 0; 212 for (std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1 != itend; it1++) 213 if (it1->alive && std::abs(it1->weight) > 1e-3) 214 N++; 215 if (N == 0) 216 return; 217 if (HF != NULL) 218 N++; 219 nstates = std::min(nstates, N); 220 ComplexMatrix H(N), S(N); 221 exactEnergy = ComplexType(0.0); 222 ComplexType nume = ComplexType(0.0); 223 ComplexType deno = ComplexType(0.0); 224 #ifdef AFQMC_TIMER 225 timer->start("SlaterDetOperations::diag::evaluate_H_S"); 226 #endif 227 int i = 0; 228 if (HF != NULL) 229 { // always include HF state in list 230 int j = i; 231 matrix_element_and_overlap(HF->data(), HF->data(), S(i, j), H(i, j)); 232 j++; 233 for (std::vector<SlaterDetWalker>::iterator it2 = itbegin; it2 != itend; it2++) 234 { 235 if (!(it2->alive && std::abs(it2->weight) > 1e-3)) 236 continue; 237 matrix_element_and_overlap(HF->data(), (it2->SlaterMat).data(), S(i, j), H(i, j)); 238 if (i != j) 239 { 240 H(j, i) = ma::conj(H(i, j)); 241 S(j, i) = ma::conj(S(i, j)); 242 } 243 j++; 244 } 245 i++; 246 } 247 for (std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1 != itend; it1++) 248 { 249 if (!(it1->alive && std::abs(it1->weight) > 1e-3)) 250 continue; 251 int j = i; 252 for (std::vector<SlaterDetWalker>::iterator it2 = it1; it2 != itend; it2++) 253 { 254 if (!(it2->alive && std::abs(it2->weight) > 1e-3)) 255 continue; 256 matrix_element_and_overlap((it1->SlaterMat).data(), (it2->SlaterMat).data(), S(i, j), H(i, j)); 257 nume += it1->weight * it2->weight * H(i, j); 258 deno += it1->weight * it2->weight * S(i, j); 259 if (i != j) 260 { 261 H(j, i) = ma::conj(H(i, j)); 262 S(j, i) = ma::conj(S(i, j)); 263 nume += ma::conj(it1->weight) * it2->weight * H(j, i) / 264 (ma::conj(std::get<0>(it1->overlap_alpha) * std::get<0>(it1->overlap_beta)) * 265 std::get<0>(it2->overlap_alpha) * std::get<0>(it2->overlap_beta)); 266 deno += ma::conj(it1->weight) * it2->weight * S(j, i) / 267 (ma::conj(std::get<0>(it1->overlap_alpha) * std::get<0>(it1->overlap_beta)) * 268 std::get<0>(it2->overlap_alpha) * std::get<0>(it2->overlap_beta)); 269 } 270 j++; 271 } 272 i++; 273 } 274 exactEnergy = nume / deno; 275 #ifdef AFQMC_TIMER 276 timer->stop("SlaterDetOperations::diag::evaluate_H_S"); 277 #endif 278 #ifdef AFQMC_TIMER 279 timer->start("SlaterDetOperations::diag::solve_GEV"); 280 #endif 281 if (nstates > 0) 282 { 283 std::vector<int> ifail(N); 284 eigVal.resize(nstates); 285 getEigV = true; 286 if (getEigV) 287 eigVec.resize(nstates, N); 288 /* 289 std::ofstream out1("Hr.dat"); 290 std::ofstream out2("Hc.dat"); 291 std::ofstream out3("Sr.dat"); 292 std::ofstream out4("Sc.dat"); 293 for(int i=0; i<N; i++) { 294 for(int j=0; j<N; j++) out1<<H(i,j).real() <<" "; 295 for(int j=0; j<N; j++) out2<<H(i,j).imag() <<" "; 296 out1<<std::endl; 297 out2<<std::endl; 298 } 299 for(int i=0; i<N; i++) { 300 for(int j=0; j<N; j++) out3<<S(i,j).real() <<" "; 301 for(int j=0; j<N; j++) out4<<S(i,j).imag() <<" "; 302 out3<<std::endl; 303 out4<<std::endl; 304 } 305 out1.close(); 306 out2.close(); 307 out3.close(); 308 out4.close(); 309 APP_ABORT("Testing. \n"); 310 */ 311 bool success = 312 DenseMatrixOperators::genHermitianEigenSysSelect(N, H.data(), N, S.data(), N, nstates, eigVal.data(), getEigV, 313 eigVec.data(), eigVec.size2(), ifail.data()); 314 if (!success) 315 for (int i = 0; i < nstates; i++) 316 eigVal[i] = 0.0; 317 else 318 { 319 std::ofstream out("diag.dat", std::ios_base::app | std::ios_base::out); 320 std::vector<double> coeff(N); 321 for (int i = 0; i < N; i++) 322 coeff[i] = std::abs(eigVec(1, i)); 323 std::sort(coeff.begin(), coeff.end()); 324 for (int i = 0; i < N; i++) 325 out << coeff[i] << " "; 326 out << std::endl; 327 out.close(); 328 } 329 } 330 #ifdef AFQMC_TIMER 331 timer->stop("SlaterDetOperations::diag::solve_GEV"); 332 #endif 333 } 334 overlap(std::vector<SlaterDetWalker>::iterator itbegin,std::vector<SlaterDetWalker>::iterator itend)335 ComplexType overlap(std::vector<SlaterDetWalker>::iterator itbegin, std::vector<SlaterDetWalker>::iterator itend) 336 { 337 std::vector<ComplexType> ovlp(2, ComplexType(0.0)); 338 ComplexType sum_w = ComplexType(0.0); 339 ComplexMatrix A(2 * NMO, NAEA); 340 ComplexMatrix B(2 * NMO, NAEA); 341 ComplexMatrix G(1); 342 std::vector<char> buffer_in; 343 std::vector<char> buffer_out; 344 std::vector<int> to(1); 345 std::vector<int> from(myComm->size()); 346 int sz = itbegin->sizeForDump(); 347 int nWtot = 0, nW = 0, nWmax = 0; 348 for (std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1 != itend; it1++) 349 if (it1->alive) 350 nW++; 351 352 to[0] = nW; 353 myComm->allgather(to, from, 1); 354 for (int i = 0; i < myComm->size(); i++) 355 { 356 nWtot += from[i]; 357 if (from[i] > nWmax) 358 nWmax = from[i]; 359 } 360 361 buffer_out.resize(nW * sz); 362 363 int cnt = 0; 364 for (std::vector<SlaterDetWalker>::iterator it = itbegin; it != itend; it++) 365 if (it->alive) 366 { 367 it->dumpToChar(buffer_out.data() + cnt); 368 cnt += sz; 369 } 370 371 ovlp[0] = ovlp[1] = ComplexType(0.0); 372 ComplexType w1, w2, o1, o2, e1, e2, ov; 373 // diagonal contribution 374 for (int i = 0; i < nW; i++) 375 { 376 itbegin->unpackFromChar(buffer_out.data() + sz * i, A, w1, e1, o1); 377 ovlp[0] += w1; 378 for (int j = i; j < nW; j++) 379 { 380 itbegin->unpackFromChar(buffer_out.data() + j * sz, B, w2, e2, o2); 381 green_function(A, B, ov, G, false); 382 ovlp[1] += 383 ma::conj(w1) * w2 * ov / (ma::conj(o1) * o2) + ma::conj(w2) * w1 * ma::conj(ov) / (ma::conj(o2) * o1); 384 } 385 } 386 387 if (myComm->size() == 1) 388 return ovlp[0] / std::sqrt(std::abs(ovlp[1])); 389 buffer_in.resize(nWmax * sz); 390 int rec = (myComm->rank() + 1) % (myComm->size()); 391 int send = (myComm->rank() - 1) % (myComm->size()); 392 for (int i = 0; i < myComm->size() - 1; i++) 393 { 394 // myComm->isend(send, send*myComm->size()+myComm->rank() ,buffer_out); 395 // myComm->irecv(rec, myComm->rank()*myComm->size()+rec ,buffer_in); 396 397 // dump way to avoid double counting, but efficiency depends heavily on load balance 398 if (rec < myComm->rank()) 399 { 400 // I only do the top half 401 for (int i = 0; i < from[rec] / 2; i++) 402 { 403 itbegin->unpackFromChar(buffer_in.data() + sz * i, A, w1, e1, o1); 404 for (int j = 0; j < nW; j++) 405 { 406 itbegin->unpackFromChar(buffer_out.data() + j * sz, B, w2, e2, o2); 407 green_function(A, B, ov, G, false); 408 ovlp[1] += 409 ma::conj(w1) * w2 * ov / (ma::conj(o1) * o2) + ma::conj(w2) * w1 * ma::conj(ov) / (ma::conj(o2) * o1); 410 } 411 } 412 } 413 else 414 { 415 // I only do the bottom half 416 for (int i = nW / 2; i < nW; i++) 417 { 418 itbegin->unpackFromChar(buffer_out.data() + sz * i, A, w1, e1, o1); 419 for (int j = 0; j < from[rec]; j++) 420 { 421 itbegin->unpackFromChar(buffer_in.data() + j * sz, B, w2, e2, o2); 422 green_function(A, B, ov, G, false); 423 ovlp[1] += 424 ma::conj(w1) * w2 * ov / (ma::conj(o1) * o2) + ma::conj(w2) * w1 * ma::conj(ov) / (ma::conj(o2) * o1); 425 } 426 } 427 } 428 429 rec = (rec + 1) % (myComm->size()); 430 send = (send - 1) % (myComm->size()); 431 } 432 433 std::vector<ComplexType> res(2); 434 //myComm->gsum(ovlp); 435 return res[0] / std::sqrt(std::abs(res[1])); 436 } 437 438 private: 439 std::vector<ComplexType> Cwork; 440 std::vector<int> pivot; 441 442 HamPtr ham; 443 ComplexMatrix tGF; 444 SPComplexMatrix GF; 445 SPComplexVector V0; 446 myTimer* timer; 447 }; 448 449 } // namespace qmcplusplus 450 451 #endif 452