1C:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
2C NAME
3C     xf3ci_mem12 -- Compute best local memory usage for transformation
4c     steps 1 and 2
5C
6C REVISION
7C     $Id$
8C
9C SYNOPSIS
10      Subroutine XF3cI_Mem12( MAvail, PMin, QMin, SMin, PMax, QMax,
11     $   SMax, NPXF, NQXF, PDim, QDim, SDim, MUsed, LCP, LCQ,
12     $   LInterm, LOut, LIBuf)
13      Implicit NONE
14      Integer MAvail, PMin, QMin, SMin, PMax, QMax,
15     $   SMax, NPXF, NQXF, PDim, QDim, SDim, MUsed, LCP, LCQ,
16     $   LInterm, LOut, LIBuf
17C
18C ARGUMENTS
19C     MAvail   Local memory available for use in transformation
20C              (in double precision words) [IN]
21C     PMin     Minimum size allowed for P blocking [IN]
22C     QMin     Minimum size allowed for Q blocking [IN]
23C     SMin     Minimum size allowed for S blocking [IN]
24C     PMax     Maximum size allowed for P blocking [IN]
25C     QMax     Maximum size allowed for Q blocking [IN]
26C     SMax     Maximum size allowed for S blocking [IN]
27C     NPXF     Number of functions P index is transformed into [IN]
28C     NQXF     Number of functions Q index is transformed into [IN]
29C     PDim     Actual size allowed for P blocking [OUT]
30C     QDim     Actual size allowed for Q blocking [OUT]
31C     SDim     Actual size allowed for S blocking [OUT]
32C     MUsed    Memory required by local arrays [OUT]
33C     LCP      Length of CP array (double precision words) [OUT]
34C     LCQ      Length of CQ array (double precision words) [OUT]
35C     LInterm  Length of Interm array (double precision words) [OUT]
36C     LOut     Length of Out array (double precision words) [OUT]
37C     LIBuf    Length of IBuf array (double precision words) [OUT]
38C
39C DESCRIPTION
40C     Attempts to compute best use of local memory to improve
41C     performance of first two transformation steps.
42C
43C     The minimum limits for P, Q, and S should be at least the
44C     size of the largest shell block in the appropriate basis to
45C     insure things will work.  The maximums would generally be
46C     the size of the basis (P, Q) or the size of the local piece
47C     of the basis (S) but may be lower to controll memory usage.
48C
49C     If there is insufficient memory to even meet the minimums,
50C     PDim, QDim and SDim are returned 0 and MUsed holds the ammount
51C     of memory that would be required for the minimum size blocks.
52C
53C     All array lengths are computed here to simplify keeping
54C     memory allocation consistent with the computational code.
55C     This way all our parent routine has to do is allocate whatever
56C     size we tell it.
57C
58C     Uses a simplistic algorithm right now which simply maximizes
59C     Q, then P, then S.  S is last because it always forms a combined
60C     index with one of the others, so it is usually large enough.
61C     But more emphasis should be placed on the size of P since
62C     about 1/3 of the work has PDim as the contraction length.
63C:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
64C LOCAL VARIABLES
65      Integer MStep
66C
67C STATEMENT FUNCTIONS
68      Integer P, Q, S
69      Integer MCP, MCQ, MInterm, MOut, MIBuf, MemReq
70      MCP(P,Q,S)     = P * NPXF
71      MCQ(P,Q,S)     = Q * NQXF
72      MInterm(P,Q,S) = NQXF * P * S
73      MOut(P,Q,S)    = NQXF * P * S
74      MIBuf(P,Q,S)   = P * Q * S
75C
76      MemReq(P,Q,S) = MCP(P,Q,S) + MCQ(P,Q,S) + MInterm(P,Q,S)
77     $   + MOut(P,Q,S) + MIBuf(P,Q,S)
78C
79C     Start with the minimums
80C
81      PDim = PMin
82      QDim = QMin
83      SDim = SMin
84C
85C     First, make sure we can do the minimum.  If not, the signal is to
86c     set all dimensions to zero.
87C
88      MUsed = MemReq(PDim, QDim, SDim)
89      If ( MUsed .gt. MAvail) then
90         PDim = 0
91         QDim = 0
92         SDim = 0
93         Goto 8000
94      EndIf
95C
96C     The following procedure depends on the MemReq function being
97c     linear in each of the arguments.
98C
99C     Make Q as large as we can given the available memory since that is
100c     most important to performance.  To do this, take the "derivative"
101c     of the memory requirement function w.r.t. Q and use that memory
102c     step size to fill in the rest of the memory.
103C
104      MStep = MemReq(PDim, 1, SDim)
105      QDim = Min( QDim + (MAvail - MUsed) / MStep, QMax)
106C
107C     Now maximize P in the same way.
108C
109      MUsed = MemReq(PDim, QDim, SDim)
110      MStep =  MemReq( 1, QDim, SDim)
111      PDim = Min( PDim + (MAvail - MUsed) / MStep, PMax)
112C
113C     And finally, S...
114C
115      MUsed = MemReq(PDim, QDim, SDim)
116      MStep =  MemReq( PDim, QDim, 1)
117      SDim = Min( SDim + (MAvail - MUsed) / MStep, SMax)
118C
119C     We've got something workable.  Set the sizes and return
120C
121 8000 Continue
122      MUsed   = MemReq(PDim, QDim, SDim)
123      LCP     = MCP(PDim, QDim, SDim)
124      LCQ     = MCQ(PDim, QDim, SDim)
125      LInterm = MInterm(PDim, QDim, SDim)
126      LOut    = MOut(PDim, QDim, SDim)
127      LIBuf   = MIBuf(PDim, QDim, SDim)
128C
129      Return
130      End
131