1c===============================================================c
2c                                                               c
3c       NCC - a New Coupled-Cluster code for NWChem             c
4c                                                               c
5c       Developed by:                                           c
6c                                                               c
7c               Jeff R. Hammond                                 c
8c               Leadership Computing Facility                   c
9c               Argonne National Laboratory                     c
10c               jhammond@mcs.anl.gov                            c
11c                                                               c
12c               Karol Kowalski                                  c
13c               Environmental Molecular Sciences Laboratory     c
14c               Pacific Northwest National Laboratory           c
15c               karol.kowalski@pnl.gov                          c
16c                                                               c
17c               Marta Włoch                                     c
18c               Department of Chemistry                         c
19c               Michigan Technological University               c
20c               wloch@mtu.edu                                   c
21c                                                               c
22c===============================================================c
23c
24#ifdef VACUUM
25#define DEBUG_PRINT
26      program nwchem
27      implicit none
28      integer x2info(2)
29      integer nocc, nvir, tszocc, tszvir
30      character*(20) label
31c
32      nocc = 13
33      nvir = 57
34      tszocc = 7
35      tszvir = 17
36c
37      call ncc_doubles_create_aa(x2info,   ! output array
38     1                           label,    ! character name for this array
39     2                           nocc,     ! number of occupied orbitals
40     3                           nvir,     ! number of virtual orbitals
41     4                           tszocc,   ! tilesize of occupied orbitals
42     5                           tszvir)   ! tilesize of virtual orbitals
43c
44      end
45c
46c this removes the dependency on stdio.fh
47c
48#define LuOut 6
49c
50c this removes the dependency on errquit.fh
51c
52#define errquit errbomb
53#define GA_ERR 100
54#define MA_ERR 200
55c
56      subroutine errbomb(string, icode, errcode)
57      implicit none
58      character*(*) string
59      integer icode
60      integer errcode
61      call ga_error(string, icode)
62      end
63#endif
64
65#ifdef DEBUG_PRINT
66#define DP(x) print*,' x = ', x
67#else
68#define DP(x)
69#endif
70c
71      subroutine ncc_doubles_create_aa(x2info,   ! output array
72     1                                 label,    ! character name for this array
73     2                                 nocc,     ! number of occupied orbitals
74     3                                 nvir,     ! number of virtual orbitals
75     4                                 tszocc,   ! tilesize of occupied orbitals
76     5                                 tszvir)   ! tilesize of virtual orbitals
77c
78c $Id$
79c
80      implicit none
81#include "mafdecls.fh"
82#include "global.fh"
83#ifndef VACUUM
84#include "errquit.fh"
85#include "stdio.fh"
86#endif
87c
88c     interface variables
89c
90      integer x2info(*)  ! array containing doubles descriptor:
91c
92c                       x2info(1) = GA handle ~ g_x2
93c                       x2info(2) = GA size   ~ s_x2
94c                       x2info(3) =
95c
96c     other handles, such as for check-pointing, should be
97c     added to this array, so be careful to not hard-core
98c     the length too many places
99c
100      character*(*) label  ! label
101      integer nocc         ! number of occupied orbitals
102      integer nvir         ! number of virtual orbitals
103      integer tszocc       ! tilesize of occupied orbitals
104      integer tszvir       ! tilesize of virtual orbitals
105c
106c     internal variables
107c
108      integer g_x2         ! GA handle
109      integer s_x2         ! GA size
110c
111      integer nftocc       ! number of full occupied tiles (1D)
112      integer nftvir       ! number of full virtual tiles (1D)
113c
114      integer ntocc       ! total number of occupied tiles (1D)
115      integer ntvir       ! total number of virtual tiles (1D)
116c
117      integer etszocc      ! end tilesize of occupied orbitals
118      integer etszvir      ! end tilesize of virtual orbitals
119c
120      integer ifetocc      ! 1 if end tile exists, 0 otherwise
121      integer ifetvir      ! 1 if end tile exists, 0 otherwise
122c
123c     virtual 2D tile configuration info
124c
125      integer p1vv_num, p1vv_dim, p1vv_tot
126      integer p2vv_num, p2vv_dim, p2vv_tot
127      integer p3vv_num, p3vv_dim, p3vv_tot
128      integer p4vv_num, p4vv_dim, p4vv_tot
129c
130c     occupied 2D tile configuration info
131c
132      integer p1oo_num, p1oo_dim, p1oo_tot
133      integer p2oo_num, p2oo_dim, p2oo_tot
134      integer p3oo_num, p3oo_dim, p3oo_tot
135      integer p4oo_num, p4oo_dim, p4oo_tot
136c
137c     GA info
138c
139      integer dims(2)      ! GA dimensions
140      integer chunk(2)     ! GA chunking
141      integer ndim         ! using 2D GA storage
142      parameter (ndim = 2)
143      integer gatype       ! numerical type for GA (always double)
144      parameter (gatype = MT_DBL)
145      integer pgroup       ! GA processor group handle
146c
147c     irregular distribution variables
148c
149      integer dist_block(2)             ! blocks per dimension
150      integer k_blksz_list,l_blksz_list ! MA handles for map helper
151      integer k_dist_map,l_dist_map     ! MA handles for map
152      integer b, bsum
153c
154c     function declarations
155c
156      integer ncc_anti, ncc_symm
157      external ncc_anti, ncc_symm
158c
159#ifdef DEBUG_PRINT
160      print*,'top of ncc_doubles_create_aa'
161      DP(tszocc)
162      DP(tszvir)
163#endif
164c
165c     determine number of full tiles (dimension tszocc^2 * tszvir^2)
166c
167      nftocc = nocc / tszocc
168      nftvir = nvir / tszvir
169c
170c     determine size of end tilesize
171c
172      etszocc = mod(nocc,tszocc)
173      etszvir = mod(nvir,tszvir)
174c
175c     determine total tile number from full tiles + end tile (if exist)
176c
177      ntocc = nftocc
178      ntvir = nftvir
179      if (etszocc.gt.0) ntocc = ntocc+1
180      if (etszvir.gt.0) ntvir = ntvir+1
181      DP(nocc)
182      DP(nvir)
183      DP(tszocc)
184      DP(tszvir)
185      DP(etszocc)
186      DP(etszvir)
187      DP(nftocc)
188      DP(nftvir)
189      DP(ntocc)
190      DP(ntvir)
191c
192c     configure tiling in four parts - virtuals
193c
194c       B_last is the end tile index, if it exists, otherwise it is ntvir+1
195c
196c       part 1 - A < B < B_last (all cases)
197c       part 2 - A < B = B_last (if end tile exists)
198c       part 3 - A = B < B_last (all cases)
199c       part 4 - A = B = B_last (if end tile exists)
200c
201c     now we tabulate the number of 2D VV tiles,
202c     their individual 2D dimension, and their total 2D dimension
203c
204c     part 1 - A < B < B_last (all cases)
205c
206      p1vv_num = ncc_anti(nftvir)
207      p1vv_dim = tszvir*tszvir
208      p1vv_tot = p1vv_num * p1vv_dim
209      DP(p1vv_num)
210      DP(p1vv_dim)
211      DP(p1vv_tot)
212c
213c     part 2 - A < B = B_last (if end tile exists)
214c
215      if (etszocc.gt.0) then
216          p2vv_num = nftvir
217          p2vv_dim = tszvir*etszvir
218      else
219          p2vv_num = 0
220          p2vv_dim = 0
221      endif
222      p2vv_tot = p2vv_num * p2vv_dim
223      DP(p2vv_num)
224      DP(p2vv_dim)
225      DP(p2vv_tot)
226c
227c     part 3 - A = B < B_last (all cases)
228c
229      p3vv_num = nftvir
230      p3vv_dim = ncc_anti(tszvir)
231      p3vv_tot = p3vv_num * p3vv_dim
232      DP(p3vv_num)
233      DP(p3vv_dim)
234      DP(p3vv_tot)
235c
236c     part 4 - A = B = B_last (if end tile exists)
237c
238      if (etszocc.gt.0) then
239          p4vv_num = 1
240          p4vv_dim = ncc_anti(etszvir)
241      else
242          p4vv_num = 0
243          p4vv_dim = 0
244      endif
245      p4vv_tot = p4vv_num * p4vv_dim
246      DP(p4vv_num)
247      DP(p4vv_dim)
248      DP(p4vv_tot)
249c
250c     sanity check, since tiled total 2D dimension should
251c     equal the untiled 2D dimension
252c
253      if ( (p1vv_tot + p2vv_tot + p3vv_tot + p4vv_tot)
254     1     .ne. ( ncc_anti(nvir) ) ) then
255          if (ga_nodeid().eq.0) write(LuOut,100) ncc_anti(nvir),
256     1              (p1vv_tot + p2vv_tot + p3vv_tot + p4vv_tot)
257 100  format(1x,'virtual tiling wrong!!! ',/,
258     1       1x,'untiled VV dimension = ',i16,/,
259     2       1x,'  tiled VV dimension = ',i16)
260      endif
261      DP(ncc_anti(nvir))
262c
263c     configure tiling in four parts - occupied
264c
265c       J_last is the end tile index, if it exists, otherwise it is ntocc+1
266c
267c       part 1 - I < J < J_last (all cases)
268c       part 2 - I < J = J_last (if end tile exists)
269c       part 3 - I = J < J_last (all cases)
270c       part 4 - I = J = J_last (if end tile exists)
271c
272c     now we tabulate the number of 2D VV tiles,
273c     their individual 2D dimension, and their total 2D dimension
274c
275c     part 1 - I < J < J_last (all cases)
276c
277      p1oo_num = ncc_anti(nftocc)
278      p1oo_dim = tszocc*tszocc
279      p1oo_tot = p1oo_num * p1oo_dim
280      DP(p1oo_num)
281      DP(p1oo_dim)
282      DP(p1oo_tot)
283c
284c     part 2 - I < J = J_last (if end tile exists)
285c
286      if (etszocc.gt.0) then
287          p2oo_num = nftocc
288          p2oo_dim = tszocc*etszocc
289      else
290          p2oo_num = 0
291          p2oo_dim = 0
292      endif
293      p2oo_tot = p2oo_num * p2oo_dim
294      DP(p2oo_num)
295      DP(p2oo_dim)
296      DP(p2oo_tot)
297c
298c     part 3 - I = J < J_last (all cases)
299c
300      p3oo_num = nftocc
301      p3oo_dim = ncc_anti(tszocc)
302      p3oo_tot = p3oo_num * p3oo_dim
303      DP(p3oo_num)
304      DP(p3oo_dim)
305      DP(p3oo_tot)
306c
307c     part 4 - I = J = J_last (if end tile exists)
308c
309      if (etszocc.gt.0) then
310          p4oo_num = 1
311          p4oo_dim = ncc_anti(etszocc)
312      else
313          p4oo_num = 0
314          p4oo_dim = 0
315      endif
316      p4oo_tot = p4oo_num * p4oo_dim
317      DP(p4oo_num)
318      DP(p4oo_dim)
319      DP(p4oo_tot)
320c
321c     sanity check, since tiled total 2D dimension should
322c     equal the untiled 2D dimension
323c
324      if ( (p1oo_tot + p2oo_tot + p3oo_tot + p4oo_tot)
325     1     .ne. ( ncc_anti(nocc) ) ) then
326          if (ga_nodeid().eq.0) write(LuOut,200) ncc_anti(nocc),
327     1              (p1oo_tot + p2oo_tot + p3oo_tot + p4oo_tot)
328 200  format(1x,'occupied tiling wrong!!! ',/,
329     1       1x,'untiled OO dimension = ',i16,/,
330     2       1x,'  tiled OO dimension = ',i16)
331      endif
332      DP(ncc_anti(nocc))
333c
334c     end of tiling configuration
335c
336c     the GA will be 2D nocc*(nocc-1)/2 * nvir*(nvir-1)/2
337c
338      s_x2 = ncc_anti(nvir) * ncc_anti(nocc)
339      x2info(2) = s_x2
340      if (ga_nodeid().eq.0) write(LuOut,300) s_x2
341 300  format(1x,'creating GA of ',i16,' doubles')
342c
343c     GA handle creation and labeling
344c
345      g_x2 = ga_create_handle()
346      call ga_set_array_name(g_x2, label)
347c
348c     GA processor group configuration
349c        to begin, we'll use the world group
350c
351      pgroup = ga_pgroup_get_world()
352      call ga_set_pgroup(g_x2, pgroup)
353c
354c     GA dimensions
355c
356      dims(1) = ncc_anti(nvir) ! leading dimension is "fast" in Fortran GA API
357      dims(2) = ncc_anti(nocc)
358      call ga_set_data(g_x2, ndim, dims, gatype)
359c
360c     GA distribution
361c        all 4D tiles should be contiguous on a single host
362c        and distributed evenly across all nodes
363c
364c     dist_block(2) - number of blocks each dimension is divided into
365c
366      dist_block(1) = p1vv_num + p2vv_num + p3vv_num + p4vv_num ! VV first
367      dist_block(2) = p1oo_num + p2oo_num + p3oo_num + p4oo_num ! OO second
368      DP(dist_block(1))
369      DP(dist_block(2))
370c
371c     dist_map - starting index for each block; the size s is a sum of all elements of nblock array
372c
373      if (.not.ma_push_get(mt_int,dist_block(1)+dist_block(2),
374     1                     'blksz_list',l_blksz_list,k_blksz_list)) then
375          call errquit ('ncc_doubles_create_aa: ma_push_get blksz_list',
376     1                  dist_block(1)+dist_block(2),MA_ERR)
377      endif
378c
379      if (.not.ma_push_get(mt_int,dist_block(1)+dist_block(2),
380     1                     'dist_map',l_dist_map,k_dist_map)) then
381          call errquit ('ncc_doubles_create_aa: ma_push_get dist_map',
382     1                  dist_block(1)+dist_block(2),MA_ERR)
383      endif
384c
385      do b = 1, p1vv_num
386          int_mb(k_blksz_list+b-1) = p1vv_dim
387      enddo
388      do b = 1,p2vv_num
389          int_mb(k_blksz_list+p1vv_num+b-1) = p2vv_dim
390      enddo
391      do b = 1,p3vv_num
392          int_mb(k_blksz_list+p1vv_num+p2vv_num+b-1) = p3vv_dim
393      enddo
394      do b = 1,p4vv_num
395          int_mb(k_blksz_list+p1vv_num+p2vv_num+p3vv_num+b-1) = p4vv_dim
396      enddo
397c
398      bsum = 1
399      int_mb(k_dist_map) = bsum
400      do b = 2,p1vv_num+p2vv_num+p3vv_num+p4vv_num
401          bsum = bsum + int_mb(k_blksz_list+b-2)
402          int_mb(k_dist_map+b-1) = bsum
403      enddo
404c
405#ifdef DEBUG_PRINT
406      print*,'========================================'
407      do b = 1,dist_block(1)
408          print*,'blksz_list',b,int_mb(k_blksz_list+b-1)
409      enddo
410c
411      do b = 1,dist_block(1)
412          print*,'dist_map',b,int_mb(k_dist_map+b-1)
413      enddo
414      print*,'========================================'
415#endif
416c
417      do b = 1, p1oo_num
418          int_mb(k_blksz_list+dist_block(1)+b-1) = p1oo_dim
419      enddo
420      do b = 1,p2oo_num
421          int_mb(k_blksz_list+dist_block(1)+p1oo_num+b-1) = p2oo_dim
422      enddo
423      do b = 1,p3oo_num
424          int_mb(k_blksz_list+dist_block(1)+
425     1           p1oo_num+p2oo_num+b-1) = p3oo_dim
426      enddo
427      do b = 1,p4oo_num
428          int_mb(k_blksz_list+dist_block(1)+
429     1           p1oo_num+p2oo_num+p3oo_num+b-1) = p4oo_dim
430      enddo
431c
432      bsum = 1
433      int_mb(k_dist_map+dist_block(1)) = bsum
434      do b = 2,p1oo_num+p2oo_num+p3oo_num+p4oo_num
435          bsum = bsum + int_mb(k_blksz_list+dist_block(1)+b-2)
436          int_mb(k_dist_map+dist_block(1)+b-1) = bsum
437      enddo
438c
439#ifdef DEBUG_PRINT
440      print*,'========================================'
441      do b = dist_block(1)+1,dist_block(1)+dist_block(2)
442          print*,'blksz_list',b,int_mb(k_blksz_list+b-1)
443      enddo
444c
445      do b = dist_block(1)+1,dist_block(1)+dist_block(2)
446          print*,'dist_map',b,int_mb(k_dist_map+b-1)
447      enddo
448      print*,'========================================'
449#endif
450c
451c     send the irregular distribution to GA
452c
453!      call ga_set_irreg_distr(g_x2, int_mb(k_dist_map), dist_block)
454c
455c     don't need the dist_map anymore since it is inside of GA now
456c
457      if (.not.ma_pop_stack(l_dist_map)) then
458        call errquit('ncc_doubles_create_aa: ma_pop_stack dist_map ',
459     1               0,MA_ERR)
460      endif
461c
462      if (.not.ma_pop_stack(l_blksz_list)) then
463        call errquit('ncc_doubles_create_aa: ma_pop_stack blksz_list ',
464     1               0,MA_ERR)
465      endif
466c
467c     Regular distribution with chunking for now
468c
469      chunk(1) = -1
470      chunk(2) = -1
471      call ga_set_chunk(g_x2, chunk)
472c
473c     GA allocation
474c
475      if (.not. ga_allocate(g_x2) ) then
476          call errquit ('ncc_doubles_create_aa: ga_allocate',g_x2,
477     1                  GA_ERR)
478      endif
479c
480      x2info(1) = g_x2
481c
482      call ga_zero(g_x2)
483c
484#ifdef DEBUG_PRINT
485      call ga_print_distribution(g_x2)
486#endif
487c
488#ifdef DEBUG_PRINT
489      print*,'end of ncc_doubles_create_aa'
490#endif
491c
492      return
493      end
494
495
496
497
498      subroutine ncc_doubles_destroy(x2info)
499c
500      implicit none
501c#include "mafdecls.fh"
502#include "global.fh"
503#ifndef VACUUM
504#include "errquit.fh"
505#include "stdio.fh"
506#endif
507c
508c     interface variables
509c
510      integer x2info(*)  ! array containing doubles descriptor:
511c
512c                       x2info(1) = GA handle ~ g_x2
513c                       x2info(2) = GA size   ~ s_x2
514c
515c     other handles, such as for check-pointing, should be
516c     added to this array, so be careful to not hard-core
517c     the length too many places
518c
519c     internal variables
520c
521      integer g_x2 ! GA handle
522c
523#ifdef DEBUG_PRINT
524      print*,'top of ncc_doubles_destroy'
525#endif
526c
527      g_x2 = x2info(1)
528c
529      if (.not. ga_destroy(g_x2) ) then
530          call errquit ('ncc_doubles_destroy: ga_destroy',g_x2,GA_ERR)
531      endif
532c
533#ifdef DEBUG_PRINT
534      print*,'end of ncc_doubles_destroy'
535#endif
536c
537      return
538      end