src/MicroScf/CtFockBuild.cpp

/* Copyright (c) 2015  Gerald Knizia
 *
 * This file is part of the IboView program (see: http://www.iboview.org)
 *
 * IboView is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3.
 *
 * IboView is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for details.
 *
 * You should have received a copy of the GNU General Public License
 * along with bfint (LICENSE). If not, see http://www.gnu.org/licenses/
 *
 * Please see IboView documentation in README.txt for:
 * -- A list of included external software and their licenses. The included
 *    external software's copyright is not touched by this agreement.
 * -- Notes on re-distribution and contributions to/further development of
 *    the IboView software
 */

// #include <iostream>
#include <cmath>
#include <boost/format.hpp>
using boost::format;
#include "Ir.h"
#include "CxDiis.h"
#include "CtRhf.h"
#include "CtIo.h"
#include "CtTiming.h"
#include "CxPodArray.h"
#if 0
   #include "RDTSC.h"
#else
   #define RESET_CLOCKS
   #define RESUME_CLOCK(x)
   #define PAUSE_CLOCK(x)
#endif

#include "CtConstants.h"
#include "CtDft.h"


#include "CtDfti.h"

namespace ct {

double g_fThrOrb = 1e-12;
//    double g_fThrOrb = 1e-10;

double MakeLogThrOrb(double ThrOrb) {
   // hm... this just used a constant 40 before. Seems a bit much.
   // set it to ThrOrb * 1e-3 or something?
   return -std::log(.1 * ThrOrb);
//    return 40.;
}

FFockComponentBuilder::~FFockComponentBuilder()
{}

void FFockComponentBuilder::AccFock(FMatrixView &FockC, FMatrixView &FockO, FBasisSet *pBasis, FMatrixView const &COccC, FMatrixView const &COccO, uint Flags, FMemoryStack &Mem)
{
   size_t
      nBf = pBasis->nFn();
   if (!((FockC.nRows == nBf && FockC.nCols == nBf) &&
         (FockO.pData == 0 || (FockO.nRows == nBf && FockO.nCols == nBf)) &&
         (COccC.nRows == nBf) &&
         (COccO.pData == 0 || COccO.nRows == nBf)))
      throw std::runtime_error("FFockComponentBuilder: Input orbitals not consistent with orbital basis sets.");
   IR_SUPPRESS_UNUSED_WARNING(Flags);
   IR_SUPPRESS_UNUSED_WARNING(Mem);
}

void FFockComponentBuilder::PrintEnergyContribs()
{
   m_Log.Write("FFockComponentBuilder::PrintEnergyContribs: not implemented for current Fock builder. Fix this.");
}

void FFockComponentBuilder::AccGradient(FMatrixView Gradient, FMatrixView COccC, FMatrixView COccO, FMemoryStack &Mem)
{
   m_Log.Write("FFockComponentBuilder::AccGradient: not implemented for current Fock builder. Fix this.");
}

bool FFockComponentBuilder::SwitchToRefineGrid(FDftGridParams const &NewGridParams)
{
   return false;
   IR_SUPPRESS_UNUSED_WARNING(NewGridParams);
}


// forms the integral matrix set (\mu\nu|F) for a single set of F shells.
// \mu comes from OrbBasis1, \nu comes from OrbBasis2. If both pointers are identical,
// symmetric integrals will be assumed [(\mu\nu|F) = (\nu\mu|F)]  and this symmetry will be used.
double *FormIntMNF(ir::FRawShell const &ShF,
   ir::FIntegralKernel *pIntKernel, FRawBasis const *pOrbBasisA, FRawBasis const *pOrbBasisB, FRawBasis const *pFitBasis, FMemoryStack &Mem, FMatrixView ScrDen, double fThr)
{
   size_t
      nAoA = pOrbBasisA->nFn(),
      nAoB = pOrbBasisB->nFn(),
      nFnF = ShF.nFn();
   double
      *pMNF;
   bool
      Symmetric = (pOrbBasisA == pOrbBasisB);
   Mem.ClearAlloc(pMNF, nAoA * nAoB * nFnF);

   std::size_t nIntTotal = 0, nIntRetained = 0;

   for ( size_t iShB = 0; iShB < pOrbBasisB->Shells.size(); ++ iShB ){
      ir::FRawShell const &ShB = pOrbBasisB->Shells[iShB];
      size_t nFnB = ShB.nFn();
      size_t iShA_First = Symmetric ? iShB : 0;
      for ( size_t iShA = iShA_First; iShA < pOrbBasisA->Shells.size(); ++ iShA ) {
         ir::FRawShell const &ShA = pOrbBasisA->Shells[iShA];
         size_t nFnA = ShA.nFn();
         nIntTotal += nFnA * nFnB;

//          if (ScrDen(iShA, iShB) < fThr)
//             continue;
         double
            fDistSqAB = DistSq(FVector3(ShA.vCen), FVector3(ShB.vCen));
         if (ShA.pRange && ShB.pRange && sqr(ShA.MaxCoRange() + ShB.MaxCoRange()) < fDistSqAB)
            continue;
         size_t
            Strides[3] = {1, nFnA, nFnA * nFnB};

//          double fElecInPair = 0.;
//          for (size_t iA = 0; iA < nFnA; ++ iA)
//             for (size_t iB = 0; iB < nFnB; ++ iB)
// //                fElecInPair += ScrDen((pOrbBasisA->iFn(iShA) + iA), (pOrbBasisB->iFn(iShB) + iB));
//                fElecInPair += sqr(ScrDen((pOrbBasisA->iFn(iShA) + iA), (pOrbBasisB->iFn(iShB) + iB)));
//          if (std::abs(fElecInPair) < sqr(fThr))
//             continue;
         nIntRetained += nFnA * nFnB;

         double
            *pIntData;
         Mem.Alloc(pIntData, nFnA * nFnB * nFnF );

         RESUME_CLOCK(100)
         ir::EvalInt2e3c(pIntData, Strides, &ShA, &ShB, &ShF,1, 1.0, pIntKernel, Mem);
         PAUSE_CLOCK(100)

         if (Symmetric) {
            assert(pOrbBasisA == pOrbBasisB && nAoA == nAoB);
            for ( size_t iF = 0; iF < nFnF; ++ iF )
               for ( size_t iB = 0; iB < nFnB; ++ iB )
                  for ( size_t iA = 0; iA < nFnA; ++ iA ) {
                     double
                        f = pIntData[iA + nFnA * (iB + nFnB * iF)];
                     // assign to (\mu\nu| and (\nu\mu|. (int has perm symmetry).
                     pMNF[(pOrbBasisA->iFn(iShA) + iA) + nAoA*(pOrbBasisA->iFn(iShB) + iB) + nAoA*nAoA*iF] = f;
                     pMNF[(pOrbBasisA->iFn(iShB) + iB) + nAoA*(pOrbBasisA->iFn(iShA) + iA) + nAoA*nAoA*iF] = f;
                  }
         } else {
            for ( size_t iF = 0; iF < nFnF; ++ iF )
               for ( size_t iB = 0; iB < nFnB; ++ iB )
                  for ( size_t iA = 0; iA < nFnA; ++ iA ) {
                     double
                        f = pIntData[iA + nFnA * (iB + nFnB * iF)];
                     pMNF[(pOrbBasisA->iFn(iShA) + iA) + nAoA*(pOrbBasisB->iFn(iShB) + iB) + nAoA*nAoB*iF] = f;
                  }
         }

         Mem.Free(pIntData);
      }
   }

//    if (&ShF == &pFitBasis->Shells[0] && fThr != 0.) {
//    if (&ShF == &pFitBasis->Shells[0]) {
//       double f = 0, g = 0;
//       for (std::size_t i = 0; i < ScrDen.GetStridedSize(); ++ i)
//          f += ScrDen.pData[i];
//       for (std::size_t i = 0; i < nAo; ++ i)
//          g += ScrDen.pData[i*(nAo+1)];
//       xout << format("ScrAB: %i of %i (\\mu\\nu| integrals evaluated (%.2f%% screened, %.6f elec total / %.6f diag).\n")
//          % nIntRetained % nIntTotal % (100.*(1. - nIntRetained/static_cast<double>(nIntTotal))) % f % g;
//    }
//    if (&ShF == &pFitBasis->Shells[0]) {
//       xout << format("ScrAB: %i of %i (\\mu\\nu| integrals evaluated (%.2f%% screened).\n")
//          % nIntRetained % nIntTotal % (100.*(1. - nIntRetained/static_cast<double>(nIntTotal)));
//    }

   return pMNF;
   IR_SUPPRESS_UNUSED_WARNING(pFitBasis);
   IR_SUPPRESS_UNUSED_WARNING(ScrDen);
   IR_SUPPRESS_UNUSED_WARNING(fThr);
}


double *FormIntMNF(ir::FRawShell const &ShF,
   ir::FIntegralKernel *pIntKernel, FRawBasis const *pOrbBasis, FRawBasis const *pFitBasis, FMemoryStack &Mem, FMatrixView ScrDen, double fThr)
{
   return FormIntMNF(ShF, pIntKernel, pOrbBasis, pOrbBasis, pFitBasis, Mem, ScrDen, fThr);
}


FFockComponentBuilderDfCoulXcCached::FFockComponentBuilderDfCoulXcCached(FDfJkOptions const &JkOptions_, FDftGridParams const &GridParams_, std::string const &XcFunctionalName_, FLog &Log_, FTimerSet *pTimers_)
   : FFockComponentBuilder(Log_, pTimers_), XcFunctionalName(XcFunctionalName_), JkOptions(JkOptions_), GridParams(GridParams_)
{
   if ( !XcFunctionalName.empty() ) {
      g_pXcFunctional = new FXcFunctional(XcFunctionalName);
      pXcFn = g_pXcFunctional;
      m_Log.Write("\n"+pXcFn->Desc());
   }
   EnergyCoulomb = 0.;
   EnergyXc = 0.;
   fElecTotal = 0.;
   AuxExpandXc = true;
//    AuxExpandXc = false;
//    m_Log.Write(" !!xc/j builder initialized with grid level: {}/{}", GridParams.nLevel, GridParams_.nLevel);
}

FFockComponentBuilderDfCoulXcCached::~FFockComponentBuilderDfCoulXcCached()
{
}

void FFockComponentBuilderDfCoulXcCached::Init(FWfDecl const &WfDecl_, FBasisSet *pOrbBasis_, FAtomSet const &Atoms_, FHfOptions const &Options_, FMemoryStack &Mem)
{
   m_WfDecl = WfDecl_;
   fElecTotalAnalytic = double(WfDecl_.nElec);

   pAtoms = &Atoms_;
   pOrbBasis = pOrbBasis_;
   nAo = pOrbBasis->nFn();

   if (1) {
      pFitBasis = new FBasisSet(*pAtoms, BASIS_JFit);
   } else {
      pFitBasis = new FBasisSet(*pAtoms, BASIS_JkFit);
      m_Log.Write(" NOTE: Using JkFit basis as JFit. Fix this!");
   }

   SwitchToRefineGrid(GridParams);
//    if (pXcFn) {
//       // find minimum L which should be integrated exactly. Depends on the basis to some degree.
//       // (this is further adjusted inside the grid generator based on atom types)
//       GridParams.iMinL = AuxExpandXc? (2*pFitBasis->nMaxL()) : (2*pOrbBasis->nMaxL());
// //       FTimer
// //          tDftGrid;
//       pDftGrid = new FDftGrid(Atoms_, GridParams, &m_Log);
// //       m_Log.Write(" Generated DFT grid with {} points for {} atoms in {:.2} sec.\n", pDftGrid->Points.size(), Atoms_.size(), (double)tDftGrid);
// //       xout << "" << format(pTimingFmt) % "DFT integration grid" % (double)tDftGrid; xout.flush();
//    }

   // Make fitting coefficients J^{-1/2}.
   FTimer TimerJcd;
//    xout << *pFitBasis;
   pFitBasisRaw = pFitBasis->pRawBasis.get();
   pOrbBasisRaw = pOrbBasis->pRawBasis.get();

   nFit = pFitBasis->nFn();
   Jcd = MakeStackMatrix(nFit, nFit, Mem);
   MakeIntMatrix(Jcd, *pFitBasis, *pFitBasis, FKrn2i_Direct(&ir::g_IrCoulombKernel), Mem);
   CalcCholeskyFactors(Jcd);
   m_Log.WriteTiming("fitting metric (coul)", (double)TimerJcd);

   // make the cached integrals themselves.
   size_t
      nAoTr = nAo * (nAo+1)/2;
   Int3ixStorage.resize(nAoTr * nFit);
   Int3ix = FMatrixView(&Int3ixStorage[0], nAoTr, nFit);

   FTimer Timer3ix;
   FMatrixView ScrDen;
   {
      FMemoryStackArray MemStacks(Mem);
      #pragma omp parallel for schedule(dynamic)
      for ( int iShF__ = 0; iShF__ < int(pFitBasisRaw->Shells.size()); ++ iShF__ ) {
         size_t iShF = size_t(iShF__); // that one's for OpenMP.
      //for ( size_t iShF = 0; iShF < pFitBasisRaw->Shells.size(); ++ iShF ) {
         if (m_Log.StatusOkay()) {
            FMemoryStack &Mem1 = MemStacks.GetStackOfThread();
            ir::FRawShell const &ShF = pFitBasisRaw->Shells[iShF];
            size_t nFnF = ShF.nFn();
//             FMemoryStack2
//                // FIXME: this is a hack around the workspace memory size computation...
//                // the *much* better alternative would be to do this op with less space.
//                Mem1(nAo * nAo * nFnF * sizeof(double) + 100000);

            double
               // (\mu\nu|F): nAo x nAo x nFnF
               *pMNF = FormIntMNF(ShF, &ir::g_IrCoulombKernel, pOrbBasisRaw, pFitBasisRaw, Mem1, ScrDen, 0.);

            for (size_t iF = 0; iF < nFnF; ++ iF) {
               FMatrixView
                  MN(pMNF + nAo*nAo*iF, nAo, nAo);
               MN.TriangularReduce(1, &Int3ix(0, iF + pFitBasisRaw->iFn(iShF)));

               assert(MN.TriangularStorageSize1() == nAoTr);
            }

            Mem1.Free(pMNF);
         }
      }
   }
   m_Log.CheckStatus(); // may raise exception.

   double
      fIntSizeMb = double(Int3ix.GetStridedSize()) * double(sizeof(Int3ix[0])) / double(1<<20);
//    xout << format(pTimingFmt) % str(format("3-index integrals (%.2f MB)") % fIntSizeMb) % (double)Timer3ix; xout.flush();
//    xout << format(pTimingFmt) % str(format("3-index integrals (%i MB)") % (size_t)fIntSizeMb) % (double)Timer3ix; xout.flush();
   m_Log.WriteTiming(fmt::format("3-index integrals ({} MB)", (size_t)fIntSizeMb), (double)Timer3ix);
   IR_SUPPRESS_UNUSED_WARNING(Options_);
   IR_SUPPRESS_UNUSED_WARNING(WfDecl_);
}


// fixme: this should probably make separate open- and closed-shell densities. open-shell needed for xc (but not for coulomb).
void FFockComponentBuilderDfCoulXcCached::Make1ixDensity(FMatrixView jgamma, FMatrixView AuxDen, FMatrixView COccC, FMatrixView COccO, FMemoryStack &Mem)
{
   FStackMatrix
      Density(nAo, nAo, &Mem); // OrbC x OrbC.T + OrbO x OrbO.T
//       jgamma(nFit, 1, &Mem),   // (A|rs) gamma[r,s].
//       AuxDen(nFit, 1, &Mem);   // (A|B)^{-1} jgamma[B]
   m_pTimers->Resume(0x201, "DF-J/XC (RDM)");
   Mxm(Density, COccC, Transpose(COccC));
   if (COccO.nCols != 0)
      Mxm(Density, COccO, Transpose(COccO), MXM_Add);

   Density.TriangularReduce(1, 0, 2.); // reduce to triangular form; multiply off diagonal elements by 2.0
   m_pTimers->Pause(0x201, "DF-J/XC (RDM)");

   m_pTimers->Resume(0x202, "DF-J/XC (1x density fit)");
   Mxva(jgamma.pData, Transpose(Int3ix), Density.pData);

   Move(AuxDen, jgamma);

   //   Solve[Jcd[AB] D[\nu B i]] -> D[\nu A I]  to get density coefficients
   CholeskySolve(AuxDen, Jcd);
   m_pTimers->Pause(0x202, "DF-J/XC (Density fit)");
}

void FFockComponentBuilderDfCoulXcCached::AccFock(FMatrixView &FockC, FMatrixView &FockO, FBasisSet *pBasis, FMatrixView const &OrbC, FMatrixView const &OrbO, uint Flags, FMemoryStack &Mem)
{
   FFockComponentBuilder::AccFock(FockC, FockO, pBasis, OrbC, OrbO, Flags, Mem); // consistency checks.

//    if (1) {
//       xout << "CALLED GEMMV-INIT! -- CALL" << std::endl;
//       double a[2] = {0}, b[4] = {0}, c[2] = {0};
//       // a little hack around a thread-init problem in eigen...  to prevent that the first call of DGEMV
//       // happens inside a OpenMP block. This collides with the static initializations done in eigen's version
//       // of those routines, which are not quite threadsafe (crashes in -O3 mode due to init flag re-arrangements by the compiler).
//       Mxv(&a[0], 1, &b[0],1,2, &c[0],1, 2,2);
//       xout << "CALLED GEMMV-INIT! -- DONE" << std::endl;
//    }

   if (pBasis != pOrbBasis)
      throw std::runtime_error("FFockComponentBuilderDfCoulXcCached must now be used with orb-projected initial guess (not Fock-projected guess!)");
//    if (pBasis != pOrbBasis) {
//       m_pTimers->Enter(0x201, "DF-J/XC (Guess)");
//       // hm hm. This might be a problem since we project Fock matrices...
//       // (note: not sure if this actually *accumulates* matrices, instead of overwriting them)
//       FockC.Clear();
//       MakeCoul(FockC, &ir::g_IrCoulombKernel, &*pBasis->pRawBasis, &*pFitBasis->pRawBasis, OrbC, OrbO, Jcd, Mem);
//
//       if (pXcFn) {
//          using namespace dfti;
//
//          size_t nGu = pBasis->nFn();
//
//          FStackMatrix
//             DenC(nGu, nGu, &Mem);
//          Mxm(DenC, OrbC, Transpose(OrbC));
//          DenC.TriangularReduce(1, 0, 1.);
//
//          FStackMatrix
//             FockTr(nGu, nGu, &Mem);
//          FockTr.Clear();
//
//          EnergyXc = 0.;
//          FDftiArgs DftiArgs = {
//             DFTI_MakeXc,
//             &EnergyXc, 0, FockTr.pData, 0,
//             DenC.pData, 0, 0, 0, 0, 0, // no orbitals and no open-shell density provided.
//             pBasis->pRawBasis.get(),
//             pXcFn.get(),
//             pDftGrid.get(),
//             g_fThrOrb, MakeLogThrOrb(g_fThrOrb), // 1e-1 * ThrDen?
//          };
//          AccXc(DftiArgs, m_Log, Mem);
//          FockTr.TriangularExpand();
//          Add(FockC, FockTr, 1.0);
//          xout << format(pResultFmt) % "Density functional" % EnergyXc;
//       }
//
//       m_pTimers->Leave(0x201, "DF-J/XC (Guess)");
//       return;
//    }

   if ( nAo != OrbC.nRows || nAo != OrbO.nRows || nFit != Jcd.nRows )
      throw std::runtime_error("AccFock: Input orbitals not consistent with orbital basis set.");

   m_pTimers->Enter(0x200, "DF-J/XC");

   FStackMatrix
      jgamma(nFit, 1, &Mem),   // (A|rs) gamma[r,s].
      AuxDen(nFit, 1, &Mem);   // (A|B)^{-1} jgamma[B]
   Make1ixDensity(jgamma, AuxDen, OrbC, OrbO, Mem);

   if (1 && AuxExpandXc) {
      // compute number of electrons in the auxiliary basis. Since we fit densities,
      // this number may not be exact. Note that only s functions carry electrons.
      // Mathematica says: Assuming[\[Alpha] > 0, Integrate[Exp[-\[Alpha]*r^2]*r^2*4*Pi, {r, 0, Infinity}]] = (pi/alpha)^(3/2)
      fElecTotalAnalytic = 0.;
      for (size_t iSh = 0; iSh < pFitBasisRaw->Shells.size(); ++ iSh) {
         ir::FRawShell const &Sh = pFitBasisRaw->Shells[iSh];
         if (Sh.l != 0) continue;
         for (size_t iExp = 0; iExp < Sh.nExp; ++ iExp) {
            double fDen = std::pow(M_PI/Sh.pExp[iExp], 1.5);
            double *pCoeff = &AuxDen[pFitBasisRaw->iFn(iSh)];
            for (size_t iCo = 0; iCo < Sh.nCo; ++ iCo)
               fElecTotalAnalytic += pCoeff[iCo] * Sh.pCo[iExp + Sh.nExp*iCo] * fDen;
         }
      }
   }


   if (1) {
      m_LastDensity.resize(nFit);
      assert(sizeof(AuxDen[0]) == sizeof(m_LastDensity[0]));
      memcpy(&m_LastDensity[0], &AuxDen[0], sizeof(AuxDen[0]) * nFit);
   }


//       Transpose(AuxDen).Print(xout, "aux density (iters)");
//    FStackMatrix
//       Density(nAo, nAo, &Mem), // OrbC x OrbC.T + OrbO x OrbO.T
//       jgamma(nFit, 1, &Mem),   // (A|rs) gamma[r,s].
//       AuxDen(nFit, 1, &Mem);   // (A|B)^{-1} jgamma[B]
//    m_pTimers->Resume(0x201, "DF-J/XC (RDM)");
//    Mxm(Density, OrbC, Transpose(OrbC));
//    Mxm(Density, OrbO, Transpose(OrbO), MXM_Add);
//
//    Density.TriangularReduce(1, 0, 2.); // reduce to triangular form; multiply off diagonal elements by 2.0
//    m_pTimers->Pause(0x201, "DF-J/XC (RDM)");
//
//    m_pTimers->Resume(0x202, "DF-J/XC (1x density fit)");
//    Mxva(jgamma.pData, Transpose(Int3ix), Density.pData);
//
//    Move(AuxDen, jgamma);
//
//    //   Solve[Jcd[AB] D[\nu B i]] -> D[\nu A I]  to get density coefficients
//    CholeskySolve(AuxDen, Jcd);
//    m_pTimers->Pause(0x202, "DF-J/XC (Density fit)");

   FStackMatrix
      AuxDen1(nFit, 1, &Mem);
   Move(AuxDen1, AuxDen);

   if (1) {
      m_pTimers->Resume(0x203, "DF-J/XC (coul. energy)");
      // make coulomb energy from 2ix integrals:
      // Ecoulomb = .5 j[A] (A|B) j[B]
      // (since we use a robust fit this is equal to the density matrix contraction.
      // the advantage here is that we can do it also in the xc auxiliary expansion case,
      // where we will not get a pure j matrix).
//       FStackMatrix
//          cgamma(nFit, 1, &Mem);
//       Move(cgamma, jgamma);
//       TriangularMxm(Transpose(cgamma), Jcd, 'R');
//       EnergyCoulomb = .5 * Dot2(cgamma.pData, cgamma.pData, nFit);
      EnergyCoulomb = .5 * Dot(AuxDen.pData, jgamma.pData, nFit);
      m_pTimers->Pause(0x203, "DF-J/XC (coul. energy)");
   }

   if (pXcFn && AuxExpandXc) {
      m_pTimers->Resume(0x204, "DF-J/XC (xc contrib.)");
      using namespace dfti;
      FStackMatrix
         vxc(nFit, 1, &Mem),
         AuxDenRenorm(nFit, 1, &Mem);

      vxc.Clear();
      Move(AuxDenRenorm, AuxDen);


      FDftiArgs DftiArgs = {
         DFTI_MakeXc | DFTI_AuxiliaryExpand,
         &EnergyXc, 0, vxc.pData, 0,
         AuxDenRenorm.pData, 0, 0, 0, 0, 0, // no orbitals and no open-shell density provided.
         pFitBasisRaw,
         pXcFn.get(),
         pDftGrid.get(),
         g_fThrOrb, MakeLogThrOrb(g_fThrOrb), 0, &fElecTotal // 1e-1 * ThrDen?
      };
//       xout << "ACC-XC: ENTER" << std::endl;
      AccXc(DftiArgs, m_Log, Mem);


      //       xout << "ACC-CX: LEAVE" << std::endl;
//       vxc.Print(xout, "AUXILIARY XC POTENTIAL (pure, pre-J^1).");
      if (0) {
         FStackMatrix
            vxcd(nFit, 1, &Mem),
            Scd(nFit, nFit, &Mem);
         MakeIntMatrix(Scd, *pFitBasis, *pFitBasis, FKrn2i_Direct(&ir::g_IrOverlapKernel), Mem);
//          CalcCholeskyFactors(Scd);
         Move(vxcd, vxc);
//          CholeskySolve(vxcd, Scd);
//          Mxm(vxcd, Scd, vxc);
         vxcd.Print(xout, "AUXILIARY XC POTENTIAL (S^-1).");
         xout << format(" <vxc,auxden> = %18.12f\n") % Dot(vxcd.pData, AuxDen1.pData, nFit);
      }
      CholeskySolve(vxc, Jcd);
      Add(AuxDen, vxc);
//       AuxDen1.Print(xout, "AUXILIARY DENSITY.");
//       vxc.Print(xout, "DENSITY-LIKE XC POTENTIAL.");
//       xout << format(pResultFmt) % "Density functional" % DfuEnergy;
//       xout << format(pResultFmt) % "Functional energy" % DfuEnergy;
//       EnergyXc
      m_pTimers->Pause(0x204, "DF-J/XC (xc contrib.)");
   }


   // recalculate integrals to form the coulomb matrix.
//    FormIntMNF_ContractF(Coul, pIntKernel, pOrbBasis, pFitBasis, AuxDen.pData, Mem);
   FStackMatrix
      FockTr(nAo, nAo, &Mem);

   m_pTimers->Resume(0x205, "DF-J/XC (j/vxc matrix)");
   Mxva(FockTr.pData, Int3ix, AuxDen.pData);
   m_pTimers->Pause(0x205, "DF-J/XC (j/vxc matrix)");

   // coulomb energy.
//    EnergyCoulomb = .5 * Dot(Density.pData, FockTr.pData, FockTr.TriangularStorageSize1());


//    FockTr.Clear(); // FIXME: REMOVE THIS.
   if (pXcFn && !AuxExpandXc) {
      m_pTimers->Resume(0x204, "DF-J/XC (xc contrib.)");
      using namespace dfti;
      FStackMatrix
         DenC(nAo, nAo, &Mem);
      Mxm(DenC, OrbC, Transpose(OrbC));
      DenC.TriangularReduce(1, 0, 1.);
      FMatrixView
         OccOrbC(0,0,0); // note: nOcc x nAo with absorbed occupation numbers.
      OccOrbC = MakeStackMatrix(OrbC.nCols, OrbC.nRows, Mem);
      Move(OccOrbC, Transpose(OrbC));

      FDftiArgs DftiArgs = {
         DFTI_MakeXc,
         &EnergyXc, 0, FockTr.pData, 0,
         DenC.pData, 0, 0, 0, 0, 0, // no orbitals and no open-shell density provided.
         pOrbBasisRaw,
         pXcFn.get(),
         pDftGrid.get(),
         g_fThrOrb, MakeLogThrOrb(g_fThrOrb), 0, &fElecTotal // 1e-1 * ThrDen?
      };
      AccXc(DftiArgs, m_Log, Mem);
//       xout << format(pResultFmt) % "Density functional" % DfuEnergy;
//       xout << format(pResultFmt) % "Functional energy" % DfuEnergy;
      m_pTimers->Pause(0x204, "DF-J/XC (xc contrib.)");
   }

   Energy = EnergyCoulomb + EnergyXc;

   FockTr.TriangularExpand();
//    FockTr.Print(xout, "FOCK + XC");
//    throw std::runtime_error("absicht.");
   Add(FockC, FockTr, 1.0);

   m_pTimers->Leave(0x200, "DF-J/XC");
}

void FFockComponentBuilderDfCoulXcCached::PrintEnergyContribs()
{
//    m_Log.WriteResult("Number of electrons", fElecTotal);
//    m_Log.WriteResult("Number of electrons", fElecTotal - double(m_WfDecl.nElec()));
//    m_Log.WriteResult("Lost electrons", fElecTotal - double(m_WfDecl.nElec));
   m_Log.Write(" {:<32}{:18.4e}\n", "Lost electrons", fElecTotal - fElecTotalAnalytic);

   m_Log.WriteResult("Coulomb energy", EnergyCoulomb);
   m_Log.WriteResult("Density functional energy", EnergyXc);
}

void FFockComponentBuilderDfCoulXcCached::AccGradient(FMatrixView Gradient, FMatrixView COccC, FMatrixView COccO, FMemoryStack &Mem)
{
//    Gradient.Clear(); // FIXME: REMOVE THIS.
}


bool FFockComponentBuilderDfCoulXcCached::SwitchToRefineGrid(FDftGridParams const &NewGridParams)
{
   GridParams = NewGridParams;
   if (pXcFn) {
      // find minimum L which should be integrated exactly. Depends on the basis to some degree.
      // (this is further adjusted inside the grid generator based on atom types)
//       GridParams.iMinL = AuxExpandXc? (2*pFitBasis->nMaxL()) : (2*pOrbBasis->nMaxL());
      GridParams.iMinL = AuxExpandXc? (pFitBasis->nMaxL()) : (2*pOrbBasis->nMaxL());
//       FTimer
//          tDftGrid;
      pDftGrid = new FDftGrid(*pAtoms, GridParams, &m_Log);
//       m_Log.Write(" Generated DFT grid with {} points for {} atoms in {:.2} sec.\n", pDftGrid->Points.size(), Atoms_.size(), (double)tDftGrid);
//       xout << "" << format(pTimingFmt) % "DFT integration grid" % (double)tDftGrid; xout.flush();
   }
   return true;
}


} // namespace ct