1#!/usr/bin/env python
2# Copyright 2014-2019,2021 The PySCF Developers. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# Author: Qiming Sun <osirpt.sun@gmail.com>
17#
18
19'''
20Density fitting
21
22Divide the 3-center Coulomb integrals to two parts.  Compute the local
23part in real space, long range part in reciprocal space.
24
25Note when diffuse functions are used in fitting basis, it is easy to cause
26linear dependence (non-positive definite) issue under PBC.
27
28Ref:
29J. Chem. Phys. 147, 164119 (2017)
30'''
31
32import os
33
34import copy
35import ctypes
36import warnings
37import tempfile
38import numpy
39import h5py
40import scipy.linalg
41from pyscf import lib
42from pyscf import gto
43from pyscf.lib import logger
44from pyscf.df import addons
45from pyscf.df.outcore import _guess_shell_ranges
46from pyscf.pbc.gto.cell import _estimate_rcut
47from pyscf.pbc import tools
48from pyscf.pbc.df import outcore
49from pyscf.pbc.df import ft_ao
50from pyscf.pbc.df import aft
51from pyscf.pbc.df import df_jk
52from pyscf.pbc.df import df_ao2mo
53from pyscf.pbc.df.aft import estimate_eta, get_nuc
54from pyscf.pbc.df.df_jk import zdotCN
55from pyscf.pbc.lib.kpts_helper import (is_zero, gamma_point, member, unique,
56                                       KPT_DIFF_TOL)
57from pyscf.pbc.df.aft import _sub_df_jk_
58from pyscf import __config__
59
60LINEAR_DEP_THR = getattr(__config__, 'pbc_df_df_DF_lindep', 1e-9)
61LONGRANGE_AFT_TURNOVER_THRESHOLD = 2.5
62
63
64def make_modrho_basis(cell, auxbasis=None, drop_eta=None):
65    auxcell = addons.make_auxmol(cell, auxbasis)
66
67# Note libcint library will multiply the norm of the integration over spheric
68# part sqrt(4pi) to the basis.
69    half_sph_norm = numpy.sqrt(.25/numpy.pi)
70    steep_shls = []
71    ndrop = 0
72    rcut = []
73    _env = auxcell._env.copy()
74    for ib in range(len(auxcell._bas)):
75        l = auxcell.bas_angular(ib)
76        np = auxcell.bas_nprim(ib)
77        nc = auxcell.bas_nctr(ib)
78        es = auxcell.bas_exp(ib)
79        ptr = auxcell._bas[ib,gto.PTR_COEFF]
80        cs = auxcell._env[ptr:ptr+np*nc].reshape(nc,np).T
81
82        if drop_eta is not None and numpy.any(es < drop_eta):
83            cs = cs[es>=drop_eta]
84            es = es[es>=drop_eta]
85            np, ndrop = len(es), ndrop+np-len(es)
86
87        if np > 0:
88            pe = auxcell._bas[ib,gto.PTR_EXP]
89            auxcell._bas[ib,gto.NPRIM_OF] = np
90            _env[pe:pe+np] = es
91# int1 is the multipole value. l*2+2 is due to the radial part integral
92# \int (r^l e^{-ar^2} * Y_{lm}) (r^l Y_{lm}) r^2 dr d\Omega
93            int1 = gto.gaussian_int(l*2+2, es)
94            s = numpy.einsum('pi,p->i', cs, int1)
95# The auxiliary basis normalization factor is not a must for density expansion.
96# half_sph_norm here to normalize the monopole (charge).  This convention can
97# simplify the formulism of \int \bar{\rho}, see function auxbar.
98            cs = numpy.einsum('pi,i->pi', cs, half_sph_norm/s)
99            _env[ptr:ptr+np*nc] = cs.T.reshape(-1)
100
101            steep_shls.append(ib)
102
103            r = _estimate_rcut(es, l, abs(cs).max(axis=1), cell.precision)
104            rcut.append(r.max())
105
106    auxcell._env = _env
107    auxcell.rcut = max(rcut)
108
109    auxcell._bas = numpy.asarray(auxcell._bas[steep_shls], order='C')
110    logger.info(cell, 'Drop %d primitive fitting functions', ndrop)
111    logger.info(cell, 'make aux basis, num shells = %d, num cGTOs = %d',
112                auxcell.nbas, auxcell.nao_nr())
113    logger.info(cell, 'auxcell.rcut %s', auxcell.rcut)
114    return auxcell
115
116def make_modchg_basis(auxcell, smooth_eta):
117    # * chgcell defines smooth gaussian functions for each angular momentum for
118    #   auxcell. The smooth functions may be used to carry the charge
119    chgcell = copy.copy(auxcell)  # smooth model density for coulomb integral to carry charge
120    half_sph_norm = .5/numpy.sqrt(numpy.pi)
121    chg_bas = []
122    chg_env = [smooth_eta]
123    ptr_eta = auxcell._env.size
124    ptr = ptr_eta + 1
125    l_max = auxcell._bas[:,gto.ANG_OF].max()
126# gaussian_int(l*2+2) for multipole integral:
127# \int (r^l e^{-ar^2} * Y_{lm}) (r^l Y_{lm}) r^2 dr d\Omega
128    norms = [half_sph_norm/gto.gaussian_int(l*2+2, smooth_eta)
129             for l in range(l_max+1)]
130    for ia in range(auxcell.natm):
131        for l in set(auxcell._bas[auxcell._bas[:,gto.ATOM_OF]==ia, gto.ANG_OF]):
132            chg_bas.append([ia, l, 1, 1, 0, ptr_eta, ptr, 0])
133            chg_env.append(norms[l])
134            ptr += 1
135
136    chgcell._atm = auxcell._atm
137    chgcell._bas = numpy.asarray(chg_bas, dtype=numpy.int32).reshape(-1,gto.BAS_SLOTS)
138    chgcell._env = numpy.hstack((auxcell._env, chg_env))
139    # _estimate_rcut is based on the integral overlap. It's likely too tight for
140    # rcut of the model charge. Using the value of functions at rcut seems enough
141    # chgcell.rcut = _estimate_rcut(smooth_eta, l_max, 1., auxcell.precision)
142    rcut = 15.
143    chgcell.rcut = (numpy.log(4*numpy.pi*rcut**2/auxcell.precision) / smooth_eta)**.5
144
145    logger.debug1(auxcell, 'make compensating basis, num shells = %d, num cGTOs = %d',
146                  chgcell.nbas, chgcell.nao_nr())
147    logger.debug1(auxcell, 'chgcell.rcut %s', chgcell.rcut)
148    return chgcell
149
150# kpti == kptj: s2 symmetry
151# kpti == kptj == 0 (gamma point): real
152def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
153    t1 = (logger.process_clock(), logger.perf_counter())
154    log = logger.Logger(mydf.stdout, mydf.verbose)
155    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
156    fused_cell, fuse = fuse_auxcell(mydf, auxcell)
157
158    # The ideal way to hold the temporary integrals is to store them in the
159    # cderi_file and overwrite them inplace in the second pass.  The current
160    # HDF5 library does not have an efficient way to manage free space in
161    # overwriting.  It often leads to the cderi_file ~2 times larger than the
162    # necessary size.  For now, dumping the DF integral intermediates to a
163    # separated temporary file can avoid this issue.  The DF intermediates may
164    # be terribly huge. The temporary file should be placed in the same disk
165    # as cderi_file.
166    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
167    fswap = lib.H5TmpFile(swapfile.name)
168    # Unlink swapfile to avoid trash
169    swapfile = None
170
171    outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2',
172                    kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory)
173    t1 = log.timer_debug1('3c2e', *t1)
174
175    nao = cell.nao_nr()
176    naux = auxcell.nao_nr()
177    mesh = mydf.mesh
178    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
179    b = cell.reciprocal_vectors()
180    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
181    ngrids = gxyz.shape[0]
182
183    kptis = kptij_lst[:,0]
184    kptjs = kptij_lst[:,1]
185    kpt_ji = kptjs - kptis
186    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
187
188    log.debug('Num uniq kpts %d', len(uniq_kpts))
189    log.debug2('uniq_kpts %s', uniq_kpts)
190    # j2c ~ (-kpt_ji | kpt_ji)
191    # Generally speaking, the int2c2e integrals with lattice sum applied on
192    # |j> are not necessary hermitian because int2c2e cannot be made converged
193    # with regular lattice sum unless the lattice sum vectors (from
194    # cell.get_lattice_Ls) are symmetric. After adding the planewaves
195    # contributions and fuse(fuse(j2c)), the output matrix is hermitian.
196    j2c = fused_cell.pbc_intor('int2c2e', hermi=0, kpts=uniq_kpts)
197
198    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0])
199    blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr()))
200    log.debug2('max_memory %s (MB)  blocksize %s', max_memory, blksize)
201    for k, kpt in enumerate(uniq_kpts):
202        coulG = mydf.weighted_coulG(kpt, False, mesh)
203        for p0, p1 in lib.prange(0, ngrids, blksize):
204            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T
205            LkR = numpy.asarray(aoaux.real, order='C')
206            LkI = numpy.asarray(aoaux.imag, order='C')
207            aoaux = None
208
209            if is_zero(kpt):  # kpti == kptj
210                j2c_p  = lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T)
211                j2c_p += lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T)
212            else:
213                j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1],
214                                    LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T)
215                j2c_p = j2cR + j2cI * 1j
216            j2c[k][naux:] -= j2c_p
217            j2c[k][:naux,naux:] -= j2c_p[:,:naux].conj().T
218            j2c_p = LkR = LkI = None
219        # Symmetrizing the matrix is not must if the integrals converged.
220        # Since symmetry cannot be enforced in the pbc_intor('int2c2e'),
221        # the aggregated j2c here may have error in hermitian if the range of
222        # lattice sum is not big enough.
223        j2c[k] = (j2c[k] + j2c[k].conj().T) * .5
224        fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T
225    j2c = coulG = None
226
227    def cholesky_decomposed_metric(uniq_kptji_id):
228        j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id])
229        j2c_negative = None
230        try:
231            j2c = scipy.linalg.cholesky(j2c, lower=True)
232            j2ctag = 'CD'
233        except scipy.linalg.LinAlgError:
234            #msg =('===================================\n'
235            #      'J-metric not positive definite.\n'
236            #      'It is likely that mesh is not enough.\n'
237            #      '===================================')
238            #log.error(msg)
239            #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg]))
240            w, v = scipy.linalg.eigh(j2c)
241            log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id)
242            log.debug('cond = %.4g, drop %d bfns',
243                      w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold))
244            v1 = v[:,w>mydf.linear_dep_threshold].conj().T
245            v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1)
246            j2c = v1
247            if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
248                idx = numpy.where(w < -mydf.linear_dep_threshold)[0]
249                if len(idx) > 0:
250                    j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T
251            w = v = None
252            j2ctag = 'eig'
253        return j2c, j2c_negative, j2ctag
254
255    feri = h5py.File(cderi_file, 'w')
256    feri['j3c-kptij'] = kptij_lst
257    nsegs = len(fswap['j3c-junk/0'])
258    def make_kpt(uniq_kptji_id, cholesky_j2c):
259        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
260        log.debug1('kpt = %s', kpt)
261        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
262        adapted_kptjs = kptjs[adapted_ji_idx]
263        nkptj = len(adapted_kptjs)
264        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
265
266        j2c, j2c_negative, j2ctag = cholesky_j2c
267
268        shls_slice = (auxcell.nbas, fused_cell.nbas)
269        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
270        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
271        Gaux *= wcoulG.reshape(-1,1)
272        kLR = Gaux.real.copy('C')
273        kLI = Gaux.imag.copy('C')
274        Gaux = None
275
276        if is_zero(kpt):  # kpti == kptj
277            aosym = 's2'
278            nao_pair = nao*(nao+1)//2
279
280            if cell.dimension == 3:
281                vbar = fuse(mydf.auxbar(fused_cell))
282                ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
283                ovlp = [lib.pack_tril(s) for s in ovlp]
284        else:
285            aosym = 's1'
286            nao_pair = nao**2
287
288        mem_now = lib.current_memory()[0]
289        log.debug2('memory = %s', mem_now)
290        max_memory = max(2000, mydf.max_memory-mem_now)
291        # nkptj for 3c-coulomb arrays plus 1 Lpq array
292        buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair)
293        shranges = _guess_shell_ranges(cell, buflen, aosym)
294        buflen = max([x[2] for x in shranges])
295        # +1 for a pqkbuf
296        if aosym == 's2':
297            Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1)))
298        else:
299            Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1)))
300        Gblksize = min(Gblksize, ngrids, 16384)
301
302        def load(aux_slice):
303            col0, col1 = aux_slice
304            j3cR = []
305            j3cI = []
306            for k, idx in enumerate(adapted_ji_idx):
307                v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T
308                                  for i in range(nsegs)])
309                # vbar is the interaction between the background charge
310                # and the auxiliary basis.  0D, 1D, 2D do not have vbar.
311                if is_zero(kpt) and cell.dimension == 3:
312                    for i in numpy.where(vbar != 0)[0]:
313                        v[i] -= vbar[i] * ovlp[k][col0:col1]
314                j3cR.append(numpy.asarray(v.real, order='C'))
315                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
316                    j3cI.append(None)
317                else:
318                    j3cI.append(numpy.asarray(v.imag, order='C'))
319                v = None
320            return j3cR, j3cI
321
322        pqkRbuf = numpy.empty(buflen*Gblksize)
323        pqkIbuf = numpy.empty(buflen*Gblksize)
324        # buf for ft_aopair
325        buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128)
326        cols = [sh_range[2] for sh_range in shranges]
327        locs = numpy.append(0, numpy.cumsum(cols))
328        tasks = zip(locs[:-1], locs[1:])
329        for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)):
330            bstart, bend, ncol = shranges[istep]
331            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d',
332                       istep+1, len(shranges), bstart, bend, ncol)
333            if aosym == 's2':
334                shls_slice = (bstart, bend, 0, bend)
335            else:
336                shls_slice = (bstart, bend, 0, cell.nbas)
337
338            for p0, p1 in lib.prange(0, ngrids, Gblksize):
339                dat = ft_ao.ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
340                                           b, gxyz[p0:p1], Gvbase, kpt,
341                                           adapted_kptjs, out=buf)
342                nG = p1 - p0
343                for k, ji in enumerate(adapted_ji_idx):
344                    aoao = dat[k].reshape(nG,ncol)
345                    pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
346                    pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
347                    pqkR[:] = aoao.real.T
348                    pqkI[:] = aoao.imag.T
349
350                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
351                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
352                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
353                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
354                        lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)
355
356            for k, ji in enumerate(adapted_ji_idx):
357                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
358                    v = fuse(j3cR[k])
359                else:
360                    v = fuse(j3cR[k] + j3cI[k] * 1j)
361                if j2ctag == 'CD':
362                    v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
363                    feri['j3c/%d/%d'%(ji,istep)] = v
364                else:
365                    feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v)
366
367                # low-dimension systems
368                if j2c_negative is not None:
369                    feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v)
370            j3cR = j3cI = None
371
372        for ji in adapted_ji_idx:
373            del(fswap['j3c-junk/%d'%ji])
374
375    # Wrapped around boundary and symmetry between k and -k can be used
376    # explicitly for the metric integrals.  We consider this symmetry
377    # because it is used in the df_ao2mo module when contracting two 3-index
378    # integral tensors to the 4-index 2e integral tensor. If the symmetry
379    # related k-points are treated separately, the resultant 3-index tensors
380    # may have inconsistent dimension due to the numerial noise when handling
381    # linear dependency of j2c.
382    def conj_j2c(cholesky_j2c):
383        j2c, j2c_negative, j2ctag = cholesky_j2c
384        if j2c_negative is None:
385            return j2c.conj(), None, j2ctag
386        else:
387            return j2c.conj(), j2c_negative.conj(), j2ctag
388
389    a = cell.lattice_vectors() / (2*numpy.pi)
390    def kconserve_indices(kpt):
391        '''search which (kpts+kpt) satisfies momentum conservation'''
392        kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt)
393        kdif_int = numpy.rint(kdif)
394        mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL
395        uniq_kptji_ids = numpy.where(mask)[0]
396        return uniq_kptji_ids
397
398    done = numpy.zeros(len(uniq_kpts), dtype=bool)
399    for k, kpt in enumerate(uniq_kpts):
400        if done[k]:
401            continue
402
403        log.debug1('Cholesky decomposition for j2c at kpt %s', k)
404        cholesky_j2c = cholesky_decomposed_metric(k)
405
406        # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the
407        # symmetry S = S
408        uniq_kptji_ids = kconserve_indices(-kpt)
409        log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt)
410        log.debug1("    make_kpt for uniq_kptji_ids %s", uniq_kptji_ids)
411        for uniq_kptji_id in uniq_kptji_ids:
412            if not done[uniq_kptji_id]:
413                make_kpt(uniq_kptji_id, cholesky_j2c)
414        done[uniq_kptji_ids] = True
415
416        # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the
417        # symmetry S = S*
418        uniq_kptji_ids = kconserve_indices(kpt)
419        log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt)
420        log.debug1("    make_kpt for %s", uniq_kptji_ids)
421        cholesky_j2c = conj_j2c(cholesky_j2c)
422        for uniq_kptji_id in uniq_kptji_ids:
423            if not done[uniq_kptji_id]:
424                make_kpt(uniq_kptji_id, cholesky_j2c)
425        done[uniq_kptji_ids] = True
426
427    feri.close()
428
429
430class GDF(aft.AFTDF):
431    '''Gaussian density fitting
432    '''
433    def __init__(self, cell, kpts=numpy.zeros((1,3))):
434        self.cell = cell
435        self.stdout = cell.stdout
436        self.verbose = cell.verbose
437        self.max_memory = cell.max_memory
438
439        self.kpts = kpts  # default is gamma point
440        self.kpts_band = None
441        self._auxbasis = None
442
443        # Search for optimized eta and mesh here.
444        if cell.dimension == 0:
445            self.eta = 0.2
446            self.mesh = cell.mesh
447        else:
448            ke_cutoff = tools.mesh_to_cutoff(cell.lattice_vectors(), cell.mesh)
449            ke_cutoff = ke_cutoff[:cell.dimension].min()
450            eta_cell = aft.estimate_eta_for_ke_cutoff(cell, ke_cutoff, cell.precision)
451            eta_guess = estimate_eta(cell, cell.precision)
452            logger.debug3(self, 'eta_guess = %g', eta_guess)
453            if eta_cell < eta_guess:
454                self.eta = eta_cell
455                self.mesh = cell.mesh
456            else:
457                self.eta = eta_guess
458                ke_cutoff = aft.estimate_ke_cutoff_for_eta(cell, self.eta, cell.precision)
459                self.mesh = tools.cutoff_to_mesh(cell.lattice_vectors(), ke_cutoff)
460                if cell.dimension < 2 or cell.low_dim_ft_type == 'inf_vacuum':
461                    self.mesh[cell.dimension:] = cell.mesh[cell.dimension:]
462        self.mesh = _round_off_to_odd_mesh(self.mesh)
463
464        # exp_to_discard to remove diffused fitting functions. The diffused
465        # fitting functions may cause linear dependency in DF metric. Removing
466        # the fitting functions whose exponents are smaller than exp_to_discard
467        # can improve the linear dependency issue. However, this parameter
468        # affects the quality of the auxiliary basis. The default value of
469        # this parameter was set to 0.2 in v1.5.1 or older and was changed to
470        # 0 since v1.5.2.
471        self.exp_to_discard = cell.exp_to_discard
472
473        # The following attributes are not input options.
474        self.exxdiv = None  # to mimic KRHF/KUHF object in function get_coulG
475        self.auxcell = None
476        self.blockdim = getattr(__config__, 'pbc_df_df_DF_blockdim', 240)
477        self.linear_dep_threshold = LINEAR_DEP_THR
478        self._j_only = False
479# If _cderi_to_save is specified, the 3C-integral tensor will be saved in this file.
480        self._cderi_to_save = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
481# If _cderi is specified, the 3C-integral tensor will be read from this file
482        self._cderi = None
483        self._rsh_df = {}  # Range separated Coulomb DF objects
484        self._keys = set(self.__dict__.keys())
485
486    @property
487    def auxbasis(self):
488        return self._auxbasis
489    @auxbasis.setter
490    def auxbasis(self, x):
491        if self._auxbasis != x:
492            self._auxbasis = x
493            self.auxcell = None
494            self._cderi = None
495
496    def reset(self, cell=None):
497        if cell is not None:
498            self.cell = cell
499        self.auxcell = None
500        self._cderi = None
501        self._cderi_to_save = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
502        self._rsh_df = {}
503        return self
504
505    @property
506    def gs(self):
507        return [n//2 for n in self.mesh]
508    @gs.setter
509    def gs(self, x):
510        warnings.warn('Attribute .gs is deprecated. It is replaced by attribute .mesh.\n'
511                      'mesh = the number of PWs (=2*gs+1) for each direction.')
512        self.mesh = [2*n+1 for n in x]
513
514    def dump_flags(self, verbose=None):
515        log = logger.new_logger(self, verbose)
516        log.info('\n')
517        log.info('******** %s ********', self.__class__)
518        log.info('mesh = %s (%d PWs)', self.mesh, numpy.prod(self.mesh))
519        if self.auxcell is None:
520            log.info('auxbasis = %s', self.auxbasis)
521        else:
522            log.info('auxbasis = %s', self.auxcell.basis)
523        log.info('eta = %s', self.eta)
524        log.info('exp_to_discard = %s', self.exp_to_discard)
525        if isinstance(self._cderi, str):
526            log.info('_cderi = %s  where DF integrals are loaded (readonly).',
527                     self._cderi)
528        elif isinstance(self._cderi_to_save, str):
529            log.info('_cderi_to_save = %s', self._cderi_to_save)
530        else:
531            log.info('_cderi_to_save = %s', self._cderi_to_save.name)
532        log.info('len(kpts) = %d', len(self.kpts))
533        log.debug1('    kpts = %s', self.kpts)
534        if self.kpts_band is not None:
535            log.info('len(kpts_band) = %d', len(self.kpts_band))
536            log.debug1('    kpts_band = %s', self.kpts_band)
537        return self
538
539    def check_sanity(self):
540        return lib.StreamObject.check_sanity(self)
541
542    def build(self, j_only=None, with_j3c=True, kpts_band=None):
543        if self.kpts_band is not None:
544            self.kpts_band = numpy.reshape(self.kpts_band, (-1,3))
545        if kpts_band is not None:
546            kpts_band = numpy.reshape(kpts_band, (-1,3))
547            if self.kpts_band is None:
548                self.kpts_band = kpts_band
549            else:
550                self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0]
551
552        self.check_sanity()
553        self.dump_flags()
554
555        self.auxcell = make_modrho_basis(self.cell, self.auxbasis,
556                                         self.exp_to_discard)
557
558        # Remove duplicated k-points. Duplicated kpts may lead to a buffer
559        # located in incore.wrap_int3c larger than necessary. Integral code
560        # only fills necessary part of the buffer, leaving some space in the
561        # buffer unfilled.
562        uniq_idx = unique(self.kpts)[1]
563        kpts = numpy.asarray(self.kpts)[uniq_idx]
564        if self.kpts_band is None:
565            kband_uniq = numpy.zeros((0,3))
566        else:
567            kband_uniq = [k for k in self.kpts_band if len(member(k, kpts))==0]
568        if j_only is None:
569            j_only = self._j_only
570        if j_only:
571            kall = numpy.vstack([kpts,kband_uniq])
572            kptij_lst = numpy.hstack((kall,kall)).reshape(-1,2,3)
573        else:
574            kptij_lst = [(ki, kpts[j]) for i, ki in enumerate(kpts) for j in range(i+1)]
575            kptij_lst.extend([(ki, kj) for ki in kband_uniq for kj in kpts])
576            kptij_lst.extend([(ki, ki) for ki in kband_uniq])
577            kptij_lst = numpy.asarray(kptij_lst)
578
579        if with_j3c:
580            if isinstance(self._cderi_to_save, str):
581                cderi = self._cderi_to_save
582            else:
583                cderi = self._cderi_to_save.name
584            if isinstance(self._cderi, str):
585                if self._cderi == cderi and os.path.isfile(cderi):
586                    logger.warn(self, 'DF integrals in %s (specified by '
587                                '._cderi) is overwritten by GDF '
588                                'initialization. ', cderi)
589                else:
590                    logger.warn(self, 'Value of ._cderi is ignored. '
591                                'DF integrals will be saved in file %s .',
592                                cderi)
593            self._cderi = cderi
594            t1 = (logger.process_clock(), logger.perf_counter())
595            self._make_j3c(self.cell, self.auxcell, kptij_lst, cderi)
596            t1 = logger.timer_debug1(self, 'j3c', *t1)
597        return self
598
599    _make_j3c = _make_j3c
600
601    def has_kpts(self, kpts):
602        if kpts is None:
603            return True
604        else:
605            kpts = numpy.asarray(kpts).reshape(-1,3)
606            if self.kpts_band is None:
607                return all((len(member(kpt, self.kpts))>0) for kpt in kpts)
608            else:
609                return all((len(member(kpt, self.kpts))>0 or
610                            len(member(kpt, self.kpts_band))>0) for kpt in kpts)
611
612    def auxbar(self, fused_cell=None):
613        r'''
614        Potential average = \sum_L V_L*Lpq
615
616        The coulomb energy is computed with chargeless density
617        \int (rho-C) V,  C = (\int rho) / vol = Tr(gamma,S)/vol
618        It is equivalent to removing the averaged potential from the short range V
619        vs = vs - (\int V)/vol * S
620        '''
621        if fused_cell is None:
622            fused_cell, fuse = fuse_auxcell(self, self.auxcell)
623        aux_loc = fused_cell.ao_loc_nr()
624        vbar = numpy.zeros(aux_loc[-1])
625        if fused_cell.dimension != 3:
626            return vbar
627
628        half_sph_norm = .5/numpy.sqrt(numpy.pi)
629        for i in range(fused_cell.nbas):
630            l = fused_cell.bas_angular(i)
631            if l == 0:
632                es = fused_cell.bas_exp(i)
633                if es.size == 1:
634                    vbar[aux_loc[i]] = -1/es[0]
635                else:
636                    # Remove the normalization to get the primitive contraction coeffcients
637                    norms = half_sph_norm/gto.gaussian_int(2, es)
638                    cs = numpy.einsum('i,ij->ij', 1/norms, fused_cell._libcint_ctr_coeff(i))
639                    vbar[aux_loc[i]:aux_loc[i+1]] = numpy.einsum('in,i->n', cs, -1/es)
640        # TODO: fused_cell.cart and l%2 == 0: # 6d 10f ...
641        # Normalization coefficients are different in the same shell for cartesian
642        # basis. E.g. the d-type functions, the 5 d-type orbitals are normalized wrt
643        # the integral \int r^2 * r^2 e^{-a r^2} dr.  The s-type 3s orbital should be
644        # normalized wrt the integral \int r^0 * r^2 e^{-a r^2} dr. The different
645        # normalization was not built in the basis.
646        vbar *= numpy.pi/fused_cell.vol
647        return vbar
648
649    def sr_loop(self, kpti_kptj=numpy.zeros((2,3)), max_memory=2000,
650                compact=True, blksize=None):
651        '''Short range part'''
652        if self._cderi is None:
653            self.build()
654        cell = self.cell
655        kpti, kptj = kpti_kptj
656        unpack = is_zero(kpti-kptj) and not compact
657        is_real = is_zero(kpti_kptj)
658        nao = cell.nao_nr()
659        if blksize is None:
660            if is_real:
661                blksize = max_memory*1e6/8/(nao**2*2)
662            else:
663                blksize = max_memory*1e6/16/(nao**2*2)
664            blksize /= 2  # For prefetch
665            blksize = max(16, min(int(blksize), self.blockdim))
666            logger.debug3(self, 'max_memory %d MB, blksize %d', max_memory, blksize)
667
668        def load(aux_slice):
669            b0, b1 = aux_slice
670            if is_real:
671                LpqR = numpy.asarray(j3c[b0:b1])
672                if unpack:
673                    LpqR = lib.unpack_tril(LpqR).reshape(-1,nao**2)
674                LpqI = numpy.zeros_like(LpqR)
675            else:
676                Lpq = numpy.asarray(j3c[b0:b1])
677                LpqR = numpy.asarray(Lpq.real, order='C')
678                LpqI = numpy.asarray(Lpq.imag, order='C')
679                Lpq = None
680                if unpack:
681                    LpqR = lib.unpack_tril(LpqR).reshape(-1,nao**2)
682                    LpqI = lib.unpack_tril(LpqI, lib.ANTIHERMI).reshape(-1,nao**2)
683            return LpqR, LpqI
684
685        with _load3c(self._cderi, 'j3c', kpti_kptj, 'j3c-kptij') as j3c:
686            slices = lib.prange(0, j3c.shape[0], blksize)
687            for LpqR, LpqI in lib.map_with_prefetch(load, slices):
688                yield LpqR, LpqI, 1
689                LpqR = LpqI = None
690
691        if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
692            # Truncated Coulomb operator is not postive definite. Load the
693            # CDERI tensor of negative part.
694            with _load3c(self._cderi, 'j3c-', kpti_kptj, 'j3c-kptij',
695                         ignore_key_error=True) as j3c:
696                slices = lib.prange(0, j3c.shape[0], blksize)
697                for LpqR, LpqI in lib.map_with_prefetch(load, slices):
698                    yield LpqR, LpqI, -1
699                    LpqR = LpqI = None
700
701    weighted_coulG = aft.weighted_coulG
702    _int_nuc_vloc = aft._int_nuc_vloc
703    get_nuc = aft.get_nuc  # noqa: F811
704    get_pp = aft.get_pp
705
706    # Note: Special exxdiv by default should not be used for an arbitrary
707    # input density matrix. When the df object was used with the molecular
708    # post-HF code, get_jk was often called with an incomplete DM (e.g. the
709    # core DM in CASCI). An SCF level exxdiv treatment is inadequate for
710    # post-HF methods.
711    def get_jk(self, dm, hermi=1, kpts=None, kpts_band=None,
712               with_j=True, with_k=True, omega=None, exxdiv=None):
713        if omega is not None:  # J/K for RSH functionals
714            cell = self.cell
715            # * AFT is computationally more efficient than GDF if the Coulomb
716            #   attenuation tends to the long-range role (i.e. small omega).
717            # * Note: changing to AFT integrator may cause small difference to
718            #   the GDF integrator. If a very strict GDF result is desired,
719            #   we can disable this trick by setting
720            #   LONGRANGE_AFT_TURNOVER_THRESHOLD to 0.
721            # * The sparse mesh is not appropriate for low dimensional systems
722            #   with infinity vacuum since the ERI may require large mesh to
723            #   sample density in vacuum.
724            if (omega < LONGRANGE_AFT_TURNOVER_THRESHOLD and
725                cell.dimension >= 2 and cell.low_dim_ft_type != 'inf_vacuum'):
726                mydf = aft.AFTDF(cell, self.kpts)
727                mydf.ke_cutoff = aft.estimate_ke_cutoff_for_omega(cell, omega)
728                mydf.mesh = tools.cutoff_to_mesh(cell.lattice_vectors(), mydf.ke_cutoff)
729            else:
730                mydf = self
731            return _sub_df_jk_(mydf, dm, hermi, kpts, kpts_band,
732                               with_j, with_k, omega, exxdiv)
733
734        if kpts is None:
735            if numpy.all(self.kpts == 0):
736                # Gamma-point calculation by default
737                kpts = numpy.zeros(3)
738            else:
739                kpts = self.kpts
740        kpts = numpy.asarray(kpts)
741
742        if kpts.shape == (3,):
743            return df_jk.get_jk(self, dm, hermi, kpts, kpts_band, with_j,
744                                with_k, exxdiv)
745
746        vj = vk = None
747        if with_k:
748            vk = df_jk.get_k_kpts(self, dm, hermi, kpts, kpts_band, exxdiv)
749        if with_j:
750            vj = df_jk.get_j_kpts(self, dm, hermi, kpts, kpts_band)
751        return vj, vk
752
753    get_eri = get_ao_eri = df_ao2mo.get_eri
754    ao2mo = get_mo_eri = df_ao2mo.general
755    ao2mo_7d = df_ao2mo.ao2mo_7d
756
757    def update_mp(self):
758        mf = copy.copy(self)
759        mf.with_df = self
760        return mf
761
762    def update_cc(self):
763        pass
764
765    def update(self):
766        pass
767
768################################################################################
769# With this function to mimic the molecular DF.loop function, the pbc gamma
770# point DF object can be used in the molecular code
771    def loop(self, blksize=None):
772        cell = self.cell
773        if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
774            raise RuntimeError('ERIs of PBC-2D systems are not positive '
775                               'definite. Current API only supports postive '
776                               'definite ERIs.')
777
778        if blksize is None:
779            blksize = self.blockdim
780        for LpqR, LpqI, sign in self.sr_loop(compact=True, blksize=blksize):
781            # LpqI should be 0 for gamma point DF
782            # assert(numpy.linalg.norm(LpqI) < 1e-12)
783            yield LpqR
784
785    def get_naoaux(self):
786        '''The dimension of auxiliary basis at gamma point'''
787# determine naoaux with self._cderi, because DF object may be used as CD
788# object when self._cderi is provided.
789        if self._cderi is None:
790            self.build()
791        # self._cderi['j3c/k_id/seg_id']
792        with addons.load(self._cderi, 'j3c/0') as feri:
793            if isinstance(feri, h5py.Group):
794                naux = feri['0'].shape[0]
795            else:
796                naux = feri.shape[0]
797
798        cell = self.cell
799        if (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum' and
800            not isinstance(self._cderi, numpy.ndarray)):
801            with h5py.File(self._cderi, 'r') as feri:
802                if 'j3c-/0' in feri:
803                    dat = feri['j3c-/0']
804                    if isinstance(dat, h5py.Group):
805                        naux += dat['0'].shape[0]
806                    else:
807                        naux += dat.shape[0]
808        return naux
809
810DF = GDF
811
812
813def fuse_auxcell(mydf, auxcell):
814    chgcell = make_modchg_basis(auxcell, mydf.eta)
815    fused_cell = copy.copy(auxcell)
816    fused_cell._atm, fused_cell._bas, fused_cell._env = \
817            gto.conc_env(auxcell._atm, auxcell._bas, auxcell._env,
818                         chgcell._atm, chgcell._bas, chgcell._env)
819    fused_cell.rcut = max(auxcell.rcut, chgcell.rcut)
820
821    aux_loc = auxcell.ao_loc_nr()
822    naux = aux_loc[-1]
823    modchg_offset = -numpy.ones((chgcell.natm,8), dtype=int)
824    smooth_loc = chgcell.ao_loc_nr()
825    for i in range(chgcell.nbas):
826        ia = chgcell.bas_atom(i)
827        l  = chgcell.bas_angular(i)
828        modchg_offset[ia,l] = smooth_loc[i]
829
830    if auxcell.cart:
831        # Normalization coefficients are different in the same shell for cartesian
832        # basis. E.g. the d-type functions, the 5 d-type orbitals are normalized wrt
833        # the integral \int r^2 * r^2 e^{-a r^2} dr.  The s-type 3s orbital should be
834        # normalized wrt the integral \int r^0 * r^2 e^{-a r^2} dr. The different
835        # normalization was not built in the basis.  There two ways to surmount this
836        # problem.  First is to transform the cartesian basis and scale the 3s (for
837        # d functions), 4p (for f functions) ... then transform back. The second is to
838        # remove the 3s, 4p functions. The function below is the second solution
839        c2s_fn = gto.moleintor.libcgto.CINTc2s_ket_sph
840        aux_loc_sph = auxcell.ao_loc_nr(cart=False)
841        naux_sph = aux_loc_sph[-1]
842        def fuse(Lpq):
843            Lpq, chgLpq = Lpq[:naux], Lpq[naux:]
844            if Lpq.ndim == 1:
845                npq = 1
846                Lpq_sph = numpy.empty(naux_sph, dtype=Lpq.dtype)
847            else:
848                npq = Lpq.shape[1]
849                Lpq_sph = numpy.empty((naux_sph,npq), dtype=Lpq.dtype)
850            if Lpq.dtype == numpy.complex128:
851                npq *= 2  # c2s_fn supports double only, *2 to handle complex
852            for i in range(auxcell.nbas):
853                l  = auxcell.bas_angular(i)
854                ia = auxcell.bas_atom(i)
855                p0 = modchg_offset[ia,l]
856                if p0 >= 0:
857                    nd = (l+1) * (l+2) // 2
858                    c0, c1 = aux_loc[i], aux_loc[i+1]
859                    s0, s1 = aux_loc_sph[i], aux_loc_sph[i+1]
860                    for i0, i1 in lib.prange(c0, c1, nd):
861                        Lpq[i0:i1] -= chgLpq[p0:p0+nd]
862
863                    if l < 2:
864                        Lpq_sph[s0:s1] = Lpq[c0:c1]
865                    else:
866                        Lpq_cart = numpy.asarray(Lpq[c0:c1], order='C')
867                        c2s_fn(Lpq_sph[s0:s1].ctypes.data_as(ctypes.c_void_p),
868                               ctypes.c_int(npq * auxcell.bas_nctr(i)),
869                               Lpq_cart.ctypes.data_as(ctypes.c_void_p),
870                               ctypes.c_int(l))
871            return Lpq_sph
872    else:
873        def fuse(Lpq):
874            Lpq, chgLpq = Lpq[:naux], Lpq[naux:]
875            for i in range(auxcell.nbas):
876                l  = auxcell.bas_angular(i)
877                ia = auxcell.bas_atom(i)
878                p0 = modchg_offset[ia,l]
879                if p0 >= 0:
880                    nd = l * 2 + 1
881                    for i0, i1 in lib.prange(aux_loc[i], aux_loc[i+1], nd):
882                        Lpq[i0:i1] -= chgLpq[p0:p0+nd]
883            return Lpq
884    return fused_cell, fuse
885
886
887class _load3c(object):
888    def __init__(self, cderi, label, kpti_kptj, kptij_label=None,
889                 ignore_key_error=False):
890        self.cderi = cderi
891        self.label = label
892        if kptij_label is None:
893            self.kptij_label = label + '-kptij'
894        else:
895            self.kptij_label = kptij_label
896        self.kpti_kptj = kpti_kptj
897        self.feri = None
898        self.ignore_key_error = ignore_key_error
899
900    def __enter__(self):
901        self.feri = h5py.File(self.cderi, 'r')
902        if self.label not in self.feri:
903            # Return a size-0 array to skip the loop in sr_loop
904            if self.ignore_key_error:
905                return numpy.zeros(0)
906            else:
907                raise KeyError('Key "%s" not found' % self.label)
908
909        kpti_kptj = numpy.asarray(self.kpti_kptj)
910        kptij_lst = self.feri[self.kptij_label][()]
911        return _getitem(self.feri, self.label, kpti_kptj, kptij_lst,
912                        self.ignore_key_error)
913
914    def __exit__(self, type, value, traceback):
915        self.feri.close()
916
917def _getitem(h5group, label, kpti_kptj, kptij_lst, ignore_key_error=False):
918    k_id = member(kpti_kptj, kptij_lst)
919    if len(k_id) > 0:
920        key = label + '/' + str(k_id[0])
921        if key not in h5group:
922            if ignore_key_error:
923                return numpy.zeros(0)
924            else:
925                raise KeyError('Key "%s" not found' % key)
926
927        dat = h5group[key]
928        if isinstance(dat, h5py.Group):
929            # Check whether the integral tensor is stored with old data
930            # foramt (v1.5.1 or older). The old format puts the entire
931            # 3-index tensor in an HDF5 dataset. The new format divides
932            # the tensor into pieces and stores them in different groups.
933            # The code below combines the slices into a single tensor.
934            dat = numpy.hstack([dat[str(i)] for i in range(len(dat))])
935
936    else:
937        # swap ki,kj due to the hermiticity
938        kptji = kpti_kptj[[1,0]]
939        k_id = member(kptji, kptij_lst)
940        if len(k_id) == 0:
941            raise RuntimeError('%s for kpts %s is not initialized.\n'
942                               'You need to update the attribute .kpts then call '
943                               '.build() to initialize %s.'
944                               % (label, kpti_kptj, label))
945
946        key = label + '/' + str(k_id[0])
947        if key not in h5group:
948            if ignore_key_error:
949                return numpy.zeros(0)
950            else:
951                raise KeyError('Key "%s" not found' % key)
952
953#TODO: put the numpy.hstack() call in _load_and_unpack class to lazily load
954# the 3D tensor if it is too big.
955        dat = _load_and_unpack(h5group[key])
956    return dat
957
958class _load_and_unpack(object):
959    '''Load data lazily'''
960    def __init__(self, dat):
961        self.dat = dat
962    def __getitem__(self, s):
963        dat = self.dat
964        if isinstance(dat, h5py.Group):
965            v = numpy.hstack([dat[str(i)][s] for i in range(len(dat))])
966        else: # For mpi4pyscf, pyscf-1.5.1 or older
967            v = numpy.asarray(dat[s])
968
969        nao = int(numpy.sqrt(v.shape[-1]))
970        v1 = lib.transpose(v.reshape(-1,nao,nao), axes=(0,2,1)).conj()
971        return v1.reshape(v.shape)
972    def __array__(self):
973        '''Create a numpy array'''
974        return self[()]
975
976    @property
977    def shape(self):
978        dat = self.dat
979        if isinstance(dat, h5py.Group):
980            all_shape = [dat[str(i)].shape for i in range(len(dat))]
981            shape = all_shape[0][:-1] + (sum(x[-1] for x in all_shape),)
982            return shape
983        else: # For mpi4pyscf, pyscf-1.5.1 or older
984            return dat.shape
985
986
987def _gaussian_int(cell):
988    r'''Regular gaussian integral \int g(r) dr^3'''
989    return ft_ao.ft_ao(cell, numpy.zeros((1,3)))[0].real
990
991def _round_off_to_odd_mesh(mesh):
992    # Round off mesh to the nearest odd numbers.
993    # Odd number of grids is preferred because even number of grids may break
994    # the conjugation symmetry between the k-points k and -k.
995    # When building the DF integral tensor in function _make_j3c, the symmetry
996    # between k and -k is used (function conj_j2c) to overcome the error
997    # caused by auxiliary basis linear dependency. More detalis of this
998    # problem can be found in function _make_j3c.
999    return [(i//2)*2+1 for i in mesh]
1000
1001