1# $Id$
2
3
4coulumb interaction via RI
5
6(1) compute and store (disk/memory??) V inverse
7
8    V(i,j) = (i|j)
9    V(i,j)**(-1) store in global array
10
11ga_zero (v)
12a. open expansion basis set "riscf basis"
13b. ga_create_atom_block(basis,geom,handle)
14c. do ish = 1,nsh
15     do jsh = 1,nsh
16........................ parallelize here
17	call int_2e2c
18	ga_dadd / ga_update_shell_block
19     enddo
20   enddo
21
22create identity array (dimension nbfe*nbfe)
23call ga_lsolve(  V * V**(-1) = I)  => Ax=B  x is V**(-1)
24
25(2)  compute intermediate contraction of integrals and density for form Wu
26
27Wu = sum(u)  (u|kl)*D(kl)
28
29
30do ksh = 1,nsh
31  do lsh = 1, ksh
32	If schwarz okay
33	   ....................  parallelize here
34	   get_shell_block D(kl)
35	   do ush = 1,nshe
36              int_2e3c
37	       dgemv or loops to contract
38           enddo
39        endif
40  enddo
41enddo
42global sum Wu (replicated data)
43
44
45(3) contract Wu against V**(-1)  P[t]  = sum(u)  V**(-1)[t,u] * W[u]
46 use ga_dgemm
47
48(4) contract P[t] against 3center eris
49
50do ish = 1,nsh
51  do jsh = 1, ish
52	If schwarz okay
53	   ....................  parallelize here
54	   do tsh = 1,nshe
55              int_2e3c
56	       dgemv or loops to contract P[t] with (ij|t)
57           enddo
58	ga_put elements into distributed J[i,j]
59	ga_update_shell_block
60        endif
61  enddo
62enddo
63
64
65(5) fold J  ie J[i,j] = J[j,i] = (J[i,j] + J[j,i])   (1/2????)
66
67use ga_symmetrize and ga_dscal (factor of 2)
68
69
70
71Exchange interaction via RI
72
73
741)  SVS
75
76    K[i,j] = (ikt) S^-1[tu] (u|v) S^-1[vw] (wjl) D[k,l]
77
78           = (ikt) (t*|v*) (vjl) D[k,l]
79
802)  VS
81
82    K[i,j] = (ik|t) S^-1[tu] (ujl) D[k,l]  ... Feyereisen
83
843) V
85
86    K[i,j] = (ik|t) V^-1[t,u] (u|kl) D[k,l] ... Almloef
87
88
89
90
91Schwarz inequality
92
93
94     |I(fg)|^2 <= I(f^2)I(g^2)
95
96Hence
97
98     |(ikt)|^2 <= |(ikik)||(tt)|
99
100Have (tt) -> 1, and
101
102     (ikik) = simple expression but can approx. it
103              by using the (ik|ik) integrals already
104              computed.  Overall exponential decay is
105              the same.
106
107
108
109
110SVS algorithm
111-------------
112
113   a) Form and store (t*|w*) = S^-1[tu] (u|v) S^-1[vw]
114
115      i) Make (tu)
116
117      ii) Invert (tu) -> S^-1[tu]
118
119      iii) Make (u|v)
120
121      iv) Two Dgemms and symmetrize to eliminate round-off error.
122
123
124   b) Algorithm ignoring sparsity
125
126
127      This can employ the V or SVS methods equally well
128
129      DO j in blocks
130
131         Make Sj[v,l] = (vjl)               NNM integrals
132
133         MxM  Xj[v,k] = Sj[v,l]*D[l,k]      NNNM flops
134
135         MxM  Yj[t,k] = (t*|v*) Xj[v,k]     NNNM flops
136
137         DO i
138
139            Make Si[t,k] = (ikt)            NNM Nblock(j) integrals
140
141            Kij = Si[t,k]Yj[t,k]            NNNM flops
142
143         ENDDO
144
145
146   c) Algorithm using sparsity
147
148      DO j
149
150         DO l
151
152            Schwarz test on jl
153
154            Get D[*,l] and compress to significant list
155
156            DO v = 1, N
157
158               compute (vjl)                    aaN integrals (SVS)
159                                                aNN integrals (V)
160
161               if ((vjl) > ??)
162
163                  DO k = non-zero D's
164
165                     Xj[k,v] = (vjl) * D[k,l]   aaaN FLOPS (SVS)
166                                                aaNN FLOPS (V)
167
168        Have Xj[k,v]
169
170        DO k
171
172           DO v
173
174              if (Xj[k,v])
175
176                 DO t
177
178                    Yjk[t] = Yjk[t] + Xj[k,v] * (t* | v*)   aaNN FLOPS (SVS)
179                                                            aNNN FLOPS (V)
180
181          DO i
182
183             Schwarz on (ik)
184
185             DO t
186
187                if (Yjk[t]) to screen integrals
188
189                K[i,j] = K[i,j] + (ikt) * Yjk[t]     aaaN Integrals (SVS)
190                                                     aaNN Integrals (V)
191
192   Problem here is that the no. of integrals computed in the final loop
193   is the same (SVS) or exceeds (V) the no. of integrals computed in the
194   standard algorithm.  Also, amount of memory is large.
195
196   Introduce blocking of both J and K indices
197
198   d) Algorithm using sparsity and blocking
199
200      DO blocks of j
201         DO blocks of k
202
203            Zero Y
204
205            DO blocks of v   ... parallelize over combined B(v) and J shells
206               DO j (shells ?) in block ... NNB(v)/B(j) elements
207
208                  Zero X
209
210                  DO l
211                     Schwarz test on jl
212                     get D(k in block, l) and compress to non-zeroes
213                     DO v in block
214                        Prescreen on v for overlap
215                        compute (vjl)                 aaNB(k) integrals (SVS)
216                                                      aNNB(k) integrals (V)
217                        if ((vjl) > ??)
218                           DO k = non-zero D's
219                              Xj[v,k] = (vjl) * D[k,l]   aaaN FLOPS (SVS)
220                                                          aaNN FLOPS (V)
221                     ENDDO v
222                  ENDDO l
223
224                  Have Xj[vlo:vhi, klo:khi]  (local array)
225
226                  DO k in block
227                     DO v in block
228                        if (Xj[k,v])
229                           DO t
230                              Y[t,j,k] = Y[t,j,k] + (t*|v*) * Xj[v,k]
231                                                   aaNN FLOPS (SVS)
232                                                   aNNN FLOPS (V)
233                           ENDDO t
234                     ENDDO v
235                  ENDDO k
236               ENDDO j
237            ENDDO blocks of v
238
239            Have Y[1:t, klo:khi, jlo:jhi] (global array)
240
241            DO i ... parallelize over combined I and K indices ... aN elements
242               DO k in block
243                  Schwarz on (ik)
244                  DO t
245                     screen on t and max (over j) Y[t,k,j]
246                                                  aaNB(j) Integrals (SVS)
247                                                  aNNB(j) Integrals (V)
248                     if (ikt)
249                        DO j in block
250                           K[i,j] = K[i,j] + (ikt) * Y[t,j,k]
251						  aaNN FLOPS (SVS)
252                                                  aNNN FLOPS (V)
253                        ENDDO j
254                  ENDDO t
255               ENDDO k
256            ENDDO i
257         ENDDO k blocks
258      ENDDO j blocks
259
260      Integral evaluation cost
261
262           SVS   aaN(B(k)+B(j))
263           V     aNN(B(k)+B(j))
264
265      Memory
266
267           M = NNN / (B(k)*B(j))
268
269      Choose B(v) as large as possible ... one shell at a time for v
270
271          B(k)*B(j) = NNN/M
272
273      Symmetry implies B(k) = B(j) = sqrt(NNN/M)
274
275      Hence integ cost is actually
276
277           SVS   aaN * (2Nsqrt(N/M))
278           V     aNN * (2Nsqrt(N/M))
279
280      If can hold all integrals (i.e., M = NNN)
281
282           B = 1
283
284           SVS   aaN * 2  (In this case precompute once and for all)
285           V     aNN * 2
286
287      If can hold 1 square NN matrix (i.e., M = NN)
288
289           B = sqrt(N)
290
291           SVS   aaN * 2sqrt(N)
292           V     aNN * 2sqrt(N)
293
294      If can hold 1 vector length N (i.e., M = N)
295
296           B = N ... i.e., 1 j and k at a time
297
298           SVS   aaN * (2N)
299           V     aNN * (2N)
300
301