1
2Most of these routines are in the utility directory, but are
3documented here for better organization.  These routines all operate
4with global arrays and are (spuriously?)  prefaced with \verb+ga_+ but
5are not part of the GA library.  Some reference other NWChem objects,
6such as geometries or basis sets.
7
8All of these routines are collective --- i.e., all processes must
9invoke them at the same time, otherwise deadlock or a fatal error will
10result.
11
12\section{Simple linear operations}
13
14\subsection{{\tt ga\_get\_diag}}
15\begin{verbatim}
16  subroutine ga_get_diagonal(g_a, diags)
17  integer g_a               ! [input] GA handle
18  double precision diags(*) ! [output] diagonals
19\end{verbatim}
20Extracts the diagonal elements of the square (real) global array in a
21'scalable' fashion, broadcasting the result to everyone.  The local
22array (\verb+diags+) must be large enough to hold the result.  The
23only communication is (apart from synchronization to avoid a race
24condition) is a global sum of length the diagonal.
25
26\subsection{{\tt ga\_maxelt}}
27\begin{verbatim}
28   subroutine ga_maxelt(g_a, value)
29   integer g_a            ! [input] GA handle
30   double precision value ! [output] abs max value
31\end{verbatim}
32Returns the absolute value of the element with largest absolute
33magnitude.  The only communication is (apart from synchronization to
34avoid a race condition) is a global maximum of unit length.
35
36\subsection{{\tt ga\_ran\_fill}}
37\begin{verbatim}
38  subroutine ga_ran_fill(g_a, ilo, ihi, jlo, jhi)
39  integer g_a                ! [input] GA handle
40  integer ilo, ihi, jlo, jhi ! [input] patch specification
41\end{verbatim}
42Fills a patch of a global array (\verb+a(ilo:ihi,jlo:jhi)+) with
43random numbers uniformly distributed between 0 and 1.  The only
44communication is necessary synchronization.
45
46\subsection{{\tt ga\_screen}}
47\begin{verbatim}
48   subroutine ga_screen(g_a, value)
49   integer g_a            ! [input] GA handle
50   double precision value ! [input] Threshold
51\end{verbatim}
52Set all elements whose absolute value is less than {\tt value} to a
53hard zero.  The only communication is necessary synchronization.
54
55\subsection{{\tt ga\_mat2col} and {\tt ga\_col2mat}}
56\begin{verbatim}
57  subroutine ga_mat2col( g_a, ailo, aihi, ajlo, ajhi,
58 &   g_b, bilo, bihi, bjlo, bjhi)
59  integer g_a
60  integer g_b
61  integer ailo, aihi, ajlo, ajhi
62  integer bilo, bihi, bjlo, bjhi
63
64  subroutine ga_col2mat( g_a, ailo, aihi, ajlo, ajhi,
65 &   g_b, bilo, bihi, bjlo, bjhi)
66  integer g_a
67  integer g_b
68  integer ailo, aihi, ajlo, ajhi
69  integer bilo, bihi, bjlo, bjhi
70\end{verbatim}
71Obsolete routines to copy patches with reshaping. Use \verb+ga_copy_patch+
72instead.
73
74\section{Linear algebra and transformations}
75
76\subsection{{\tt ga\_mix}}
77\begin{verbatim}
78  subroutine ga_mix(g_a, n, nvec, b, ld)
79  integer g_a                 [input]
80  integer n, nvec, ld         [input]
81  double precision b(ld,nvec) [input]
82\end{verbatim}
83This routine is set up to optimize the rotation of a (small) set of
84vectors amoung themselves.  The matrix ($A(n,n_{vec})$) referenced by
85GA handle \verb+g_a+ must be distributed by columns so that an entire
86row is present on a processor --- a fatal error results if this is not
87the case.  The matrix {\tt b} must be replicated.  With these
88conditions no communication is necessary, other than that required for
89synchronizations to avoid race conditions.  The routine performs the
90following operation
91\begin{displaymath}
92     A_{ij} \Leftarrow \sum_{l=1,n_{vec}} A_{il} B_{lj}, i=1,n; j=1,n_{vec}
93\end{displaymath}
94which can be regarded as a multiplication of two matrices, one global
95and the other local, with the result overwriting the input global
96matrix.
97
98It would be easy to make this routine use more general distributions
99but still leave the optimized code for columnwise distribution.
100
101\subsection{{\tt two\_index\_transf}}
102\begin{verbatim}
103  subroutine two_index_transf( g_a, g_lhs, g_rhs, g_tmp, g_b )
104  integer g_a           ! [input] Handle to initial GA
105  integer g_lhs, g_rhs  ! [input] Handles to transformation
106  integer g_tmp         ! [input] Handle to scratch GA
107  integer g_b           ! [input] Handle to output GA
108\end{verbatim}
109Two-index square matrix transform --- $B = U_{LHS}^T A U_{RHS}$.  Done
110using calls to \verb+ga_dgemm+.  The scratch array must be a square
111array of the same dimension as all the other arrays.  It would be easy
112(and very useful) to generalize this to handle non-square transformations.
113
114\subsection{{\tt ga\_matpow}}
115\begin{verbatim}
116  subroutine ga_matpow(g_v, pow, mineval)
117  integer g_v              ! [input/output] Handle to GA
118  double precision pow     ! [input] Exponent
119  double precision mineval ! [input] Threshold for evals
120\end{verbatim}
121The square matrix referenced by \verb+g_v+ is raised to the power
122\verb+pow+ by diagonalizing it, discarding (if \verb+pow+ is les than
123zero) eigenvectors whose eigenvalue is smaller than \verb+mineval+,
124raising the diagonal matrix to the required power, and transforming
125back.  The only allowed values for \verb+pow+ are 1, -1,
126$\frac{1}{2}$, and $\frac{-1}{2}$, though it would be easy to
127generalize the routine to handle any value.
128
129The input GA is overwritten with the exponentiated result.  It is {\em
130not} guaranteed that the same handle will be returned -- if it is most
131efficient, the original GA may be destroyed and a new GA created to
132hold the result.
133
134Uses a GA the size of V, and a local array the size of the number
135of rows of V.  The eigensolver requires additional memory.
136
137Due to the use of a generalized eigensolver, an additional GA the size
138of V is also used.
139
140\subsection{{\tt mk\_fit\_xf}}
141\begin{verbatim}
142  logical function mk_fit_xf(approx, split, basis, mineval, g_v)
143  character*(*) approx, split [input]
144  integer basis               [input]
145  integer g_v                 [output]
146  double precision mineval    [input]
147\end{verbatim}
148Returns in \verb+g_v+ a newly allocated global array containing the
149appropriate fitting matrix for the specified
150resolution-of-the-identity (RI) approximation.
151
152 Arguments:
153\begin{itemize}
154\item \verb+approx+ --- RI approximation used (SVS, S, or V)
155\item \verb+split+ ---  Whether or not to return the square root of the matrix
156              so that it can be used to transform both sets of 3c ints.
157              (Y or N).
158\item \verb+basis+ --- Handle to fitting basis
159\item \verb+mineval+ --- Minimum eigenvalue of V matrix to be retained in
160              the inversion
161\item \verb+g_v+ ---  Returns new global array handle to the $V^{-1/2}$ matrix
162\end{itemize}
163
164Return value:
165\begin{itemize}
166\item \TRUE\ if successful, even if some eigenvalues fell below \verb+mineval+.
167\item \FALSE\ if errors occured in dynamic memory (MA or GA) operations,
168inquiries about the basis, or in obtaining the required integrals.
169
170Note: the integral package must be initialized before calling this routine.
171
172Memory use:
173\item Creates and returns a global array (\verb+g_v+) the size of
174\verb+bas_numbf(basis)+$^2$.
175\item Additional temporary usage consists of the largest of:
176\begin{enumerate}
177\item Integral requirements, reported by \verb+int_mem_2e2c+.
178\item \verb+bas_numbf(basis)+$^2$ + \verb+bas_numbf(basis)+ and whatever additional
179        space is required by \verb+ga_diag_std+.
180\item 2 * \verb+bas_numbf(basis)+$^2$.
181\end{enumerate}
182\end{itemize}
183
184\subsection{{ga\_orthog}}
185\begin{verbatim}
186  subroutine ga_orthog(g_vecs, g_over, ometric)
187  integer g_vecs  ! [input] Vectors to be orthonormalized
188  integer g_over  ! [input] Optional metric/overlap matrix
189  logical ometric ! [input] If .true. use metric matrix
190\end{verbatim}
191The columns of the GA referenced by the handle \verb+g_vecs+ are
192assumed to be vectors that must be orthonormalized.  If \verb+ometric+
193is specified as \FALSE\ then the standard inner product is used.
194Otherwise the \verb+g_over+ is assumed to refer to the metric (or
195overlap).  Internally, MA is used to allocate a copy of the matrix
196(and the metric) in a specific distribution.  If insufficient memory
197is available or the matrix is singular a fatal error results.
198
199\subsection{{\tt ga\_orthog\_vec}}
200\begin{verbatim}
201  subroutine ga_orthog_vec(n, nvec, g_m, g_x, j)
202  integer n    ! vector length
203  integer nvec ! no. of vectors
204  integer g_m  ! GA handle for matrix
205  integer g_x  ! GA handle for vector
206  integer j    ! Column for vector
207\end{verbatim}
208Orthogonalize the vector \verb+x(1:n,j)+ to the vectors
209\verb+g(1:n,1:nvec)+.  Note that x is {\em not} normalized.  This routine
210is/was used by some of the iterative equation solvers.
211
212\section{Iterative linear algebra operations}
213
214\subsection{{\tt ga\_iter\_diag}}
215\begin{verbatim}
216  logical function ga_iter_diag(n, nroot, maxiter, maxsub, tol,
217 &     precond, product, oprint, eval0, g_evec, eval, rnorm, iter)
218  integer n                 ! Matrix dimension
219  integer nroot             ! No. of eigen vectors sought
220  integer maxiter           ! Max. no. of iterations
221  integer maxsub            ! Max. dimension of iterative subspace
222  double precision tol      ! Required norm of residual
223  external precond          ! Preconditioner
224  external product          ! Matrix-vector product
225  logical oprint            ! Control printing to unit 6
226  double precision eval0    ! Estimate of lowest eval
227  integer g_evec            ! n by nroot GA for guess and final
228  double precision eval(nroot) ! Returns eigen values
229  double precision rnorm(nroot) ! Returns residual norms
230  integer iter              ! Returns no. of iterations used
231\end{verbatim}
232  Solve the eigenvalue equation ${\bf A}x = \lambda x$ with the
233vectors $x$ in GA and a routine (product) to form a matrix vector
234products to a required precision.  Return \TRUE\ if converged, \FALSE
235otherwise. Rnorm returns the actual attained precision for each root.
236
237The block-Davidson-like algorithm solves for the best solution for
238each eigenvector in the iterative subspace ($x_i, i = 1, k$) with
239\begin{displaymath}
240  {\bf A} y = {\bf S}y\lambda, \mbox{where} A_{ij} = x_i^{\dagger} A x_j,
241\mbox{and} S_{ij} = x_i^{\dagger} x_j
242\end{displaymath}
243 {\em NB:} The matrix vector products ${\bf A}x_i$ are performed by the user
244provided routine \verb+product()+ to a precision specified by this routine
245(currently products are performed one at a time, but it is easy to
246improve the routine to perform many in one call).
247
248The best solution within the iterative subspace is then
249\begin{displaymath}
250  x = \sum_i y_i x_i
251\end{displaymath}
252New expansion vectors are added by multiplying the residual
253\begin{displaymath}
254  r = ({\bf A} - s {\bf I}) x, \mbox{where} s \mbox{is the shift},
255\end{displaymath}
256with some approximation ($P$) to the inverse of ${\bf A}-s{\bf I}$.
257This preconditioning is performed by the user provided routine
258\verb+precond()+.  If \verb+eval0+ is a hard zero then the shift ($s$) is
259chosen as the current estimate for the eigenvalue that the next update
260strives to improve.  Otherwise the shift is fixed as \verb+eval0+
261which is appropriate for convergence to a known energy spectrum from
262some poor initial guess.
263
264  The program cyles through the lowest nroot roots updating each that
265does not yet satisfy the convergence criterion which is
266\begin{displaymath}
267   {\tt rnorm(root)} = ||r|| < {\tt tol}
268\end{displaymath}
269
270 On input the global array \verb+x(n,nroot)+ should contain either an
271initial guess at the eigen vectors or zeroes.  If any vector is zero
272then random numbers are used.
273
274The use must proivde these routines:
275\begin{itemize}
276\item \verb+subroutine product(precision, g_x, g_ax)+ ---
277     computes the product ${\bf A}x$ to the specified precision (absolute
278     magnitude error in any element of the product) returning the result
279     in the GA \verb+g_ax+.
280\item \verb+subroutine precond(g_r, shift)+ ---
281     Apply an approximation ($P$) to the inverse of ${\bf A} - s {\bf
282     I}$ to the vector in \verb+g_r+ overwriting \verb+g_r+ with the result.
283\end{itemize}
284
285If the initial guess is zero no redundant matrix product is formed.
286
287Temporary global arrays of dimension \verb+n*maxsub+, \verb+n*maxsub+,
288\verb+n+ and \verb+n+ are created.
289
290
291\subsection{{\tt ga\_iter\_lsolve}}
292
293\subsection{{\tt ga\_iter\_orthog}}
294
295\subsection{{\tt ga\_iter\_project}}
296
297\section{Miscellaneous}
298
299\subsection{{\tt ga\_pcg\_minimize}}
300
301\subsection{{\tt int\_1e\_ga}}
302
303\subsection{{\tt int\_2c\_ga}}
304
305
306