1#define NBLOCKS 3
2
3*
4* $Id$
5*
6
7*     ***********************************
8*     *					*
9*     *	   D3dB_c_transpose_jk		*
10*     *					*
11*     ***********************************
12
13      subroutine D3dB_c_transpose_jk(nb,A,tmp1,tmp2)
14
15*****************************************************
16*                                                   *
17*      This routine performs the operation          *
18*               A(i,k,j) <- A(i,j,k)                *
19*                                                   *
20*      np = the number of worker nodes              *
21*      proc#=0...(np-1)
22*                                                   *
23*       this transpose uses more buffer space       *
24*       then transpose2                             *
25*****************************************************
26      implicit none
27      integer     nb
28      complex*16  A(*)
29      complex*16  tmp1(*),tmp2(*)
30
31#include "bafdecls.fh"
32
33#include "D3dB.fh"
34
35
36*     **** indexing variables ****
37c     integer iq_to_i1((NFFT1/2+1)*NFFT2*NSLABS)
38c     integer iq_to_i2((NFFT1/2+1)*NFFT2*NSLABS)
39c     integer i1_start(NFFT3+1)
40c     integer i2_start(NFFT3+1)
41      integer iq_to_i1(2,NBLOCKS)
42      integer iq_to_i2(2,NBLOCKS)
43      integer i1_start(2,NBLOCKS)
44      integer i2_start(2,NBLOCKS)
45      common / trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
46
47*     **** Used to avoid asynchronous communications ****
48      integer Nchannels(NBLOCKS)
49      integer channel_proc(2,NBLOCKS)
50      integer channel_type(2,NBLOCKS)
51      common / channel_blk / channel_proc,channel_type,Nchannels
52
53#include "tcgmsg.fh"
54#include "msgtypesf.h"
55      integer  rcv_len,rcv_proc
56
57*     **** local variables ***
58      integer i,c
59      integer it
60      integer source
61      integer msglen
62      integer pfrom,pto
63      integer taskid,np
64
65*     **** external functions ****
66      integer  Parallel2d_convert_taskid_i
67      external Parallel2d_convert_taskid_i
68
69      call Parallel2d_taskid_i(taskid)
70      call Parallel2d_np_i(np)
71
72*     **** pack A(i) array ****
73#ifndef CRAY
74!DIR$ ivdep
75#endif
76      do i=1,nfft3d(nb)  !(nx(nb)/2+1)*ny(nb)*nq(nb)
77         tmp1(int_mb(iq_to_i1(1,nb)+i-1)) = A(i)
78      end do
79
80*     **** it = 0, transpose data on same thread ****
81      msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1)
82#ifndef CRAY
83!DIR$ ivdep
84#endif
85      do i=1,msglen
86         tmp2(int_mb(i2_start(1,nb)+1-1)+i-1)
87     > = tmp1(int_mb(i1_start(1,nb)+1-1)+i-1)
88      end do
89
90
91      do c=1,Nchannels(nb)
92*        **** receive packed array data ****
93         if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then
94            pfrom=int_mb(channel_proc(1,nb)+c-1)
95            it = mod((taskid+np-pfrom),np)
96
97            source=pfrom
98            msglen = (int_mb(i2_start(1,nb)+it+2-1)
99     >             -  int_mb(i2_start(1,nb)+it+1-1))
100
101            if (msglen.gt.0) then
102               call RCV(9+MSGDBL,
103     >                  tmp2(int_mb(i2_start(1,nb)+it+1-1)),
104     >                  mdtob(2*msglen),rcv_len,
105     >                  Parallel2d_convert_taskid_i(source),
106     >                  rcv_proc,1)
107            end if
108         end if
109
110*        **** send packed array to other processors ****
111         if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then
112            pto=int_mb(channel_proc(1,nb)+c-1)
113            it = mod((pto-taskid+np),np)
114
115            msglen    = (int_mb(i1_start(1,nb)+it+2-1)
116     >                - int_mb(i1_start(1,nb)+it+1-1))
117
118            if (msglen.gt.0) then
119               call SND(9+MSGDBL,
120     >                  tmp1(int_mb(i1_start(1,nb)+it+1-1)),
121     >                  mdtob(2*msglen),
122     >                  Parallel2d_convert_taskid_i(pto),
123     >                  1)
124            end if
125         end if
126
127      end do
128
129
130*     **** unpack A(i) array ****
131#ifndef CRAY
132!DIR$ ivdep
133#endif
134      do i=1,nfft3d(nb)  !(nx(nb)/2+1)*ny(nb)*nq(nb)
135         A(i) = tmp2(int_mb(iq_to_i2(1,nb)+i-1))
136      end do
137
138
139      return
140      end
141
142c*     ***********************************
143c*     *					*
144c*     *	   D3dB_nc_transpose_jk		*
145c*     *					*
146c*     ***********************************
147c
148c      subroutine D3dB_nc_transpose_jk(nb,ne,A,tmp1,tmp2)
149c
150c*****************************************************
151c*                                                   *
152c*      This routine performs the operation          *
153c*               A(i,k,j,n) <- A(i,j,k,n)                *
154c*                                                   *
155c*      np = the number of worker nodes              *
156c*      proc#=0...(np-1)
157c*                                                   *
158c*       this transpose uses more buffer space       *
159c*       then transpose2                             *
160c*****************************************************
161c      implicit none
162c      integer     nb,ne
163c      complex*16  A(*)
164c      complex*16  tmp1(*),tmp2(*)
165c
166c#include "bafdecls.fh"
167c
168c#include "D3dB.fh"
169c
170c
171c*     **** indexing variables ****
172cc     integer iq_to_i1((NFFT1/2+1)*NFFT2*NSLABS)
173cc     integer iq_to_i2((NFFT1/2+1)*NFFT2*NSLABS)
174cc     integer i1_start(NFFT3+1)
175cc     integer i2_start(NFFT3+1)
176c      integer iq_to_i1(2,NBLOCKS)
177c      integer iq_to_i2(2,NBLOCKS)
178c      integer i1_start(2,NBLOCKS)
179c      integer i2_start(2,NBLOCKS)
180c      common / trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
181c
182c*     **** Used to avoid asynchronous communications ****
183c      integer Nchannels(NBLOCKS)
184c      integer channel_proc(2,NBLOCKS)
185c      integer channel_type(2,NBLOCKS)
186c      common / channel_blk / channel_proc,channel_type,Nchannels
187c
188c#include "tcgmsg.fh"
189c#include "msgtypesf.h"
190c      integer  rcv_len,rcv_proc
191c
192c*     **** local variables ***
193c      integer i,c,n,nnfft3d
194c      integer it
195c      integer source
196c      integer msglen
197c      integer pfrom,pto
198c      integer taskid,np
199c
200c      call Parallel_taskid(taskid)
201c      call Parallel_np(np)
202c
203c      !nnfft3d = (nx(nb)/2+1)*ny(nb)*nq(nb)
204c
205c*     **** pack A(i) array ****
206c      do i=1,nfft3d(nb)
207c#ifndef CRAY
208c!DIR$ ivdep
209c#endif
210c      do n=1,ne
211c         tmp1(n+(int_mb(iq_to_i1(1,nb)+i-1)-1)*ne)
212c     >   = A(i+(n-1)*nfft3d(nb))
213c      end do
214c      end do
215c
216c*     **** it = 0, transpose data on same thread ****
217c      msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1)
218c      do i=1,msglen
219c#ifndef CRAY
220c!DIR$ ivdep
221c#endif
222c      do n=1,ne
223c         tmp2(n+(int_mb(i2_start(1,nb))+i-2)*ne)
224c     > = tmp1(n+(int_mb(i1_start(1,nb))+i-2)*ne)
225c      end do
226c      end do
227c
228c
229c      do c=1,Nchannels(nb)
230c*        **** receive packed array data ****
231c         if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then
232c            pfrom=int_mb(channel_proc(1,nb)+c-1)
233c            it = mod((taskid+np-pfrom),np)
234c
235c            source=pfrom
236c            msglen = (int_mb(i2_start(1,nb)+it+2-1)
237c     >             -  int_mb(i2_start(1,nb)+it+1-1))*ne
238c
239c            if (msglen.gt.0) then
240c               call RCV(9+MSGDBL,
241c     >                  tmp2(1+(int_mb(i2_start(1,nb)+it+1-1)-1)*ne),
242c     >                  mdtob(2*msglen),rcv_len,
243c     >                  source,rcv_proc,1)
244c            end if
245c         end if
246c
247c*        **** send packed array to other processors ****
248c         if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then
249c            pto=int_mb(channel_proc(1,nb)+c-1)
250c            it = mod((pto-taskid+np),np)
251c
252c            msglen    = (int_mb(i1_start(1,nb)+it+2-1)
253c     >                - int_mb(i1_start(1,nb)+it+1-1))*ne
254c
255c            if (msglen.gt.0) then
256c               call SND(9+MSGDBL,
257c     >                  tmp1(1+(int_mb(i1_start(1,nb)+it+1-1)-1)*ne),
258c     >                  mdtob(2*msglen),pto,1)
259c            end if
260c         end if
261c
262c      end do
263c
264c
265c*     **** unpack A(i) array ****
266c      do i=1,nfft3d(nb)
267c#ifndef CRAY
268c!DIR$ ivdep
269c#endif
270c      do n=1,ne
271c         A(i+(n-1)*nfft3d(nb))
272c     >   = tmp2(n+(int_mb(iq_to_i2(1,nb)+i-1)-1)*ne)
273c      end do
274c      end do
275c
276c
277c      return
278c      end
279c
280
281
282
283
284*     ***********************************
285*     *                                 *
286*     *    D3dB_c_timereverse           *
287*     *                                 *
288*     ***********************************
289
290      subroutine D3dB_c_timereverse(nb,A,tmp1,tmp2)
291
292*****************************************************
293*                                                   *
294*      This routine performs the operation          *
295*            A(i,j,k) <- conjugate(A(i,-j,-k))      *
296*                                                   *
297*      np = the number of worker nodes              *
298*      proc#=0...(np-1)                             *
299*                                                   *
300*****************************************************
301      implicit none
302      integer     nb
303      complex*16  A(*)
304      complex*16  tmp1(*)
305      complex*16  tmp2(*)
306
307#include "bafdecls.fh"
308
309#include "D3dB.fh"
310
311
312*     **** indexing variables ****
313c     integer iq_to_i1(2**NFFT2*NSLABS)
314c     integer iq_to_i2(2**NFFT2*NSLABS)
315c     integer i1_start(NFFT3+1)
316c     integer i2_start(NFFT3+1)
317      integer iq_to_i1(2,NBLOCKS)
318      integer iq_to_i2(2,NBLOCKS)
319      integer i1_start(2,NBLOCKS)
320      integer i2_start(2,NBLOCKS)
321      common / timereverse_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
322
323*     **** Used to avoid asynchronous communications ****
324      integer Nchannels(NBLOCKS)
325      integer channel_proc(2,NBLOCKS)
326      integer channel_type(2,NBLOCKS)
327      common / channel_blk / channel_proc,channel_type,Nchannels
328
329#include "tcgmsg.fh"
330#include "msgtypesf.h"
331      integer  rcv_len,rcv_proc
332
333*     **** local variables ***
334      integer i,c
335      integer it
336      integer source
337      integer msglen
338      integer pfrom,pto
339      integer taskid,np
340      integer index1,index2
341
342*     **** external functions ****
343      integer  Parallel2d_convert_taskid_i
344      external Parallel2d_convert_taskid_i
345
346      call Parallel2d_taskid_i(taskid)
347      call Parallel2d_np_i(np)
348
349*     **** pack A(i) array ****
350      do index1=int_mb(i1_start(1,nb)+1-1),
351     >         (int_mb(i1_start(1,nb)+np+1-1)-1)
352         tmp1(index1) = A(int_mb(iq_to_i1(1,nb)+index1-1))
353      end do
354
355*     **** it = 0, transpose data on same thread ****
356      msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1)
357      do i=1,msglen
358         tmp2(int_mb(i2_start(1,nb)+1-1)+i-1)
359     > = tmp1(int_mb(i1_start(1,nb)+1-1)+i-1)
360      end do
361
362
363      do c=1,Nchannels(nb)
364*        **** receive packed array data ****
365         if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then
366            pfrom=int_mb(channel_proc(1,nb)+c-1)
367            it = mod((taskid+np-pfrom),np)
368
369            source=pfrom
370            msglen = (int_mb(i2_start(1,nb)+it+2-1)
371     >             -  int_mb(i2_start(1,nb)+it+1-1))
372
373            if (msglen.gt.0) then
374               call RCV(9+MSGDBL,
375     >                  tmp2(int_mb(i2_start(1,nb)+it+1-1)),
376     >                  mdtob(2*msglen),rcv_len,
377     >                  Parallel2d_convert_taskid_i(source),
378     >                  rcv_proc,1)
379            end if
380         end if
381
382*        **** send packed array to other processors ****
383         if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then
384            pto=int_mb(channel_proc(1,nb)+c-1)
385            it = mod((pto-taskid+np),np)
386
387            msglen    = (int_mb(i1_start(1,nb)+it+2-1)
388     >                - int_mb(i1_start(1,nb)+it+1-1))
389
390            if (msglen.gt.0) then
391               call SND(9+MSGDBL,
392     >                  tmp1(int_mb(i1_start(1,nb)+it+1-1)),
393     >                  mdtob(2*msglen),
394     >                  Parallel2d_convert_taskid_i(pto),
395     >                  1)
396            end if
397         end if
398
399      end do
400
401*     **** unpack A(i) array ****
402      do index2=int_mb(i2_start(1,nb)+1-1),
403     >         (int_mb(i2_start(1,nb)+np+1-1)-1)
404        A(int_mb(iq_to_i2(1,nb)+index2-1))=dconjg(tmp2(index2))
405      end do
406
407      return
408      end
409
410
411
412*     ***********************************
413*     *					*
414*     *	   D3dB_c_transpose_ijk		*
415*     *					*
416*     ***********************************
417
418      subroutine D3dB_c_transpose_ijk(nb,op,A,tmp1,tmp2)
419
420*****************************************************
421*                                                   *
422*      This routine performs the operation          *
423*               A(i,k,j) <- A(i,j,k)                *
424*                                                   *
425*      np = the number of worker nodes              *
426*      proc#=0...(np-1)
427*                                                   *
428*       this transpose uses more buffer space       *
429*       then transpose2                             *
430*****************************************************
431      implicit none
432      integer     nb,op
433      complex*16  A(*)
434      complex*16  tmp1(*),tmp2(*)
435
436#include "bafdecls.fh"
437
438#include "D3dB.fh"
439
440
441*     **** indexing variables ****
442      integer h_iq_to_i1(2,6,NBLOCKS)
443      integer h_iq_to_i2(2,6,NBLOCKS)
444      integer h_i1_start(2,6,NBLOCKS)
445      integer h_i2_start(2,6,NBLOCKS)
446      common / trans_blk_ijk / h_iq_to_i1,
447     >                         h_iq_to_i2,
448     >                         h_i1_start,
449     >                         h_i2_start
450
451*     **** Used to avoid asynchronous communications ****
452      integer Nchannels(NBLOCKS)
453      integer channel_proc(2,NBLOCKS)
454      integer channel_type(2,NBLOCKS)
455      common / channel_blk / channel_proc,channel_type,Nchannels
456
457#include "tcgmsg.fh"
458#include "msgtypesf.h"
459      integer  rcv_len,rcv_proc
460
461*     **** local variables ***
462      integer i,c,nnfft3d
463      integer it
464      integer source
465      integer msglen
466      integer pfrom,pto
467      integer taskid,np
468
469*     **** external functions ****
470      integer  Parallel2d_convert_taskid_i
471      external Parallel2d_convert_taskid_i
472
473      call Parallel2d_taskid_i(taskid)
474      call Parallel2d_np_i(np)
475
476
477*     **** pack A(i) array ****
478      if ((op.eq.1).or.(op.eq.5)) nnfft3d = (nx(nb)/2+1)*nq1(nb)
479      if ((op.eq.2).or.(op.eq.4)) nnfft3d = (ny(nb))    *nq2(nb)
480      if ((op.eq.3).or.(op.eq.6)) nnfft3d = (nz(nb))    *nq3(nb)
481#ifndef CRAY
482!DIR$ ivdep
483#endif
484      do i=1,nnfft3d
485         tmp1(int_mb(h_iq_to_i1(1,op,nb)+i-1)) = A(i)
486      end do
487
488*     **** it = 0, transpose data on same thread ****
489      msglen = int_mb(h_i2_start(1,op,nb)+2-1)
490     >       - int_mb(h_i2_start(1,op,nb)+1-1)
491#ifndef CRAY
492!DIR$ ivdep
493#endif
494      do i=1,msglen
495         tmp2(int_mb(h_i2_start(1,op,nb)+1-1)+i-1)
496     > = tmp1(int_mb(h_i1_start(1,op,nb)+1-1)+i-1)
497      end do
498
499
500      do c=1,Nchannels(nb)
501*        **** receive packed array data ****
502         if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then
503            pfrom=int_mb(channel_proc(1,nb)+c-1)
504            it = mod((taskid+np-pfrom),np)
505
506            source=pfrom
507            msglen = (int_mb(h_i2_start(1,op,nb)+it+2-1)
508     >             -  int_mb(h_i2_start(1,op,nb)+it+1-1))
509
510            if (msglen.gt.0) then
511               call RCV(9+MSGDBL,
512     >                  tmp2(int_mb(h_i2_start(1,op,nb)+it+1-1)),
513     >                  mdtob(2*msglen),rcv_len,
514     >                  Parallel2d_convert_taskid_i(source),
515     >                  rcv_proc,1)
516            end if
517         end if
518
519*        **** send packed array to other processors ****
520         if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then
521            pto=int_mb(channel_proc(1,nb)+c-1)
522            it = mod((pto-taskid+np),np)
523
524            msglen    = (int_mb(h_i1_start(1,op,nb)+it+2-1)
525     >                -  int_mb(h_i1_start(1,op,nb)+it+1-1))
526
527            if (msglen.gt.0) then
528               call SND(9+MSGDBL,
529     >                  tmp1(int_mb(h_i1_start(1,op,nb)+it+1-1)),
530     >                  mdtob(2*msglen),
531     >                  Parallel2d_convert_taskid_i(pto),1)
532            end if
533         end if
534
535      end do
536
537
538*     **** unpack A(i) array ****
539      if ((op.eq.4).or.(op.eq.6)) nnfft3d = (nx(nb)/2+1)*nq1(nb)
540      if ((op.eq.1).or.(op.eq.3)) nnfft3d = (ny(nb))    *nq2(nb)
541      if ((op.eq.2).or.(op.eq.5)) nnfft3d = (nz(nb))    *nq3(nb)
542#ifndef CRAY
543!DIR$ ivdep
544#endif
545      do i=1,nnfft3d
546         A(i) = tmp2(int_mb(h_iq_to_i2(1,op,nb)+i-1))
547      end do
548
549
550      return
551      end
552
553
554
555*     ***********************************
556*     *					*
557*     *	   D3dB_t_transpose_ijk		*
558*     *					*
559*     ***********************************
560
561      subroutine D3dB_t_transpose_ijk(nb,op,A,tmp1,tmp2)
562
563*****************************************************
564*                                                   *
565*      This routine performs the operation          *
566*               A(i,k,j) <- A(i,j,k)                *
567*                                                   *
568*      np = the number of worker nodes              *
569*      proc#=0...(np-1)
570*                                                   *
571*       this transpose uses more buffer space       *
572*       then transpose2                             *
573*****************************************************
574      implicit none
575      integer nb,op
576      real*8  A(*)
577      real*8  tmp1(*),tmp2(*)
578
579#include "bafdecls.fh"
580#include "D3dB.fh"
581
582
583*     **** indexing variables ****
584      integer h_iq_to_i1(2,6,NBLOCKS)
585      integer h_iq_to_i2(2,6,NBLOCKS)
586      integer h_i1_start(2,6,NBLOCKS)
587      integer h_i2_start(2,6,NBLOCKS)
588      common / trans_blk_ijk / h_iq_to_i1,
589     >                         h_iq_to_i2,
590     >                         h_i1_start,
591     >                         h_i2_start
592
593*     **** Used to avoid asynchronous communications ****
594      integer Nchannels(NBLOCKS)
595      integer channel_proc(2,NBLOCKS)
596      integer channel_type(2,NBLOCKS)
597      common / channel_blk / channel_proc,channel_type,Nchannels
598
599#include "tcgmsg.fh"
600#include "msgtypesf.h"
601      integer  rcv_len,rcv_proc
602
603*     **** local variables ***
604      integer i,c,nnfft3d
605      integer it
606      integer source
607      integer msglen
608      integer pfrom,pto
609      integer taskid,np
610
611*     **** external functions ****
612      integer  Parallel2d_convert_taskid_i
613      external Parallel2d_convert_taskid_i
614
615      call Parallel2d_taskid_i(taskid)
616      call Parallel2d_np_i(np)
617
618
619*     **** pack A(i) array ****
620      if ((op.eq.1).or.(op.eq.5)) nnfft3d = (nx(nb)/2+1)*nq1(nb)
621      if ((op.eq.2).or.(op.eq.4)) nnfft3d = (ny(nb))    *nq2(nb)
622      if ((op.eq.3).or.(op.eq.6)) nnfft3d = (nz(nb))    *nq3(nb)
623#ifndef CRAY
624!DIR$ ivdep
625#endif
626      do i=1,nnfft3d
627         tmp1(int_mb(h_iq_to_i1(1,op,nb)+i-1)) = A(i)
628      end do
629
630*     **** it = 0, transpose data on same thread ****
631      msglen = int_mb(h_i2_start(1,op,nb)+2-1)
632     >       - int_mb(h_i2_start(1,op,nb)+1-1)
633#ifndef CRAY
634!DIR$ ivdep
635#endif
636      do i=1,msglen
637         tmp2(int_mb(h_i2_start(1,op,nb)+1-1)+i-1)
638     > = tmp1(int_mb(h_i1_start(1,op,nb)+1-1)+i-1)
639      end do
640
641
642      do c=1,Nchannels(nb)
643*        **** receive packed array data ****
644         if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then
645            pfrom=int_mb(channel_proc(1,nb)+c-1)
646            it = mod((taskid+np-pfrom),np)
647
648            source=pfrom
649            msglen = (int_mb(h_i2_start(1,op,nb)+it+2-1)
650     >             -  int_mb(h_i2_start(1,op,nb)+it+1-1))
651
652            if (msglen.gt.0) then
653               call RCV(9+MSGDBL,
654     >                  tmp2(int_mb(h_i2_start(1,op,nb)+it+1-1)),
655     >                  mdtob(msglen),rcv_len,
656     >                  Parallel2d_convert_taskid_i(source),
657     >                  rcv_proc,1)
658            end if
659         end if
660
661*        **** send packed array to other processors ****
662         if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then
663            pto=int_mb(channel_proc(1,nb)+c-1)
664            it = mod((pto-taskid+np),np)
665
666            msglen    = (int_mb(h_i1_start(1,op,nb)+it+2-1)
667     >                -  int_mb(h_i1_start(1,op,nb)+it+1-1))
668
669            if (msglen.gt.0) then
670               call SND(9+MSGDBL,
671     >                  tmp1(int_mb(h_i1_start(1,op,nb)+it+1-1)),
672     >                  mdtob(msglen),
673     >                  Parallel2d_convert_taskid_i(pto),
674     >                  1)
675            end if
676         end if
677
678      end do
679
680
681*     **** unpack A(i) array ****
682      if ((op.eq.4).or.(op.eq.6)) nnfft3d = (nx(nb)/2+1)*nq1(nb)
683      if ((op.eq.1).or.(op.eq.3)) nnfft3d = (ny(nb))    *nq2(nb)
684      if ((op.eq.2).or.(op.eq.5)) nnfft3d = (nz(nb))    *nq3(nb)
685#ifndef CRAY
686!DIR$ ivdep
687#endif
688      do i=1,nnfft3d
689         A(i) = tmp2(int_mb(h_iq_to_i2(1,op,nb)+i-1))
690      end do
691
692
693      return
694      end
695
696
697*     ***********************************
698*     *                                 *
699*     *         D3dB_SumAll             *
700*     *                                 *
701*     ***********************************
702
703      subroutine D3dB_SumAll(sum)
704c     implicit none
705      real*8  sum
706
707#include "tcgmsg.fh"
708#include "msgtypesf.h"
709
710*     **** local variables ****
711      integer np_i
712
713*     **** external functions ****
714      integer  Parallel2d_comm_i
715      external Parallel2d_comm_i
716
717      call Parallel2d_np_i(np_i)
718      if (np_i.gt.1) then
719         call GA_PGROUP_DGOP(Parallel2d_comm_i(),
720     >                       9+MSGDBL,sum,1,'+')
721      end if
722
723      return
724      end
725
726
727
728*     ***********************************
729*     *                                 *
730*     *         D3dB_Vector_SumAll      *
731*     *                                 *
732*     ***********************************
733
734      subroutine D3dB_Vector_SumAll(n,sum)
735c     implicit none
736      integer n
737      real*8  sum(*)
738
739#include "bafdecls.fh"
740#include "tcgmsg.fh"
741#include "msgtypesf.h"
742#include "errquit.fh"
743
744
745*     **** temporary workspace ****
746      integer np_i
747
748*     **** external functions ****
749      integer  Parallel2d_comm_i
750      external Parallel2d_comm_i
751
752      call nwpw_timing_start(2)
753
754      call Parallel2d_np_i(np_i)
755      if (np_i.gt.1) then
756         call GA_PGROUP_DGOP(Parallel2d_comm_i(),
757     >                       9+MSGDBL,sum,n,'+')
758      end if
759
760      call nwpw_timing_end(2)
761
762      return
763      end
764
765
766
767
768*     ***********************************
769*     *                                 *
770*     *         D3dB_ISumAll            *
771*     *                                 *
772*     ***********************************
773
774      subroutine D3dB_ISumAll(sum)
775c     implicit none
776      integer  sum
777
778#include "tcgmsg.fh"
779#include "msgtypesf.h"
780
781*     **** local variables ****
782      integer np_i
783
784*     **** external functions ****
785      integer  Parallel2d_comm_i
786      external Parallel2d_comm_i
787
788
789      call Parallel2d_np_i(np_i)
790      if (np_i.gt.1) then
791         call GA_PGROUP_IGOP(Parallel2d_comm_i(),
792     >                       9+MSGINT,sum,1,'+')
793      end if
794
795      return
796      end
797
798*     ***********************************
799*     *                                 *
800*     *         D3dB_Vector_ISumAll     *
801*     *                                 *
802*     ***********************************
803
804      subroutine D3dB_Vector_ISumAll(n,sum)
805c     implicit none
806      integer n
807      integer  sum(*)
808
809#include "bafdecls.fh"
810#include "errquit.fh"
811#include "tcgmsg.fh"
812#include "msgtypesf.h"
813
814*     **** local variables ****
815      integer np_i
816
817*     **** external functions ****
818      integer  Parallel2d_comm_i
819      external Parallel2d_comm_i
820
821      call nwpw_timing_start(2)
822
823      call Parallel2d_np_i(np_i)
824      if (np_i.gt.1) then
825         call GA_PGROUP_IGOP(Parallel2d_comm_i(),
826     >                       9+MSGINT,sum,n,'+')
827      end if
828
829      call nwpw_timing_end(2)
830
831      return
832      end
833
834
835
836
837
838
839*     ***********************************
840*     *					*
841*     *	   D3dB_c_ptranspose1_jk	*
842*     *					*
843*     ***********************************
844
845      subroutine D3dB_c_ptranspose1_jk(nbb,A,tmp1,tmp2)
846
847*****************************************************
848*                                                   *
849*      This routine performs the operation          *
850*               A(i,k,j) <- A(i,j,k)                *
851*                                                   *
852*      np = the number of worker nodes              *
853*      proc#=0...(np-1)
854*                                                   *
855*       this transpose uses more buffer space       *
856*       then transpose2                             *
857*****************************************************
858      implicit none
859      integer     nbb
860      complex*16  A(*)
861      complex*16  tmp1(*),tmp2(*)
862
863#include "bafdecls.fh"
864#include "D3dB.fh"
865
866*     **** indexing variables ****
867      integer iq_to_i1(2,0:3)
868      integer iq_to_i2(2,0:3)
869      integer iz_to_i2(2,0:3)
870      integer i1_start(2,0:3)
871      integer i2_start(2,0:3)
872      common / ptrans_blk1 / iq_to_i1,iq_to_i2,iz_to_i2,
873     >                       i1_start,i2_start
874
875
876
877*     **** Used to avoid asynchronous communications ****
878      integer Nchannels(NBLOCKS)
879      integer channel_proc(2,NBLOCKS)
880      integer channel_type(2,NBLOCKS)
881      common / channel_blk / channel_proc,channel_type,Nchannels
882
883#include "tcgmsg.fh"
884#include "msgtypesf.h"
885      integer  rcv_len,rcv_proc
886
887*     **** local variables ***
888      integer c,it
889      integer source
890      integer msglen
891      integer pfrom,pto
892      integer taskid,np
893      integer n1,n2,id
894
895*     **** external functions ****
896      integer  Parallel2d_convert_taskid_i
897      external Parallel2d_convert_taskid_i
898
899
900      id = 2*(nbb/2) + 1
901
902      call Parallel2d_taskid_i(taskid)
903      call Parallel2d_np_i(np)
904
905
906      n1 = int_mb(i1_start(1,nbb)+np) - 1
907      n2 = int_mb(i2_start(1,nbb)+np) - 1
908
909*     **** pack A(i) array ****
910      call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1)
911
912
913*     **** it = 0, transpose data on same thread ****
914      msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1)
915      call dcopy(2*msglen,
916     >           tmp1(int_mb(i1_start(1,nbb)+1-1)),1,
917     >           tmp2(int_mb(i2_start(1,nbb)+1-1)),1)
918
919
920      do c=1,Nchannels(1)
921*        **** receive packed array data ****
922         if (int_mb(channel_type(1,1)+c-1) .eq. 1) then
923            pfrom=int_mb(channel_proc(1,1)+c-1)
924            it = mod((taskid+np-pfrom),np)
925
926            source=pfrom
927            msglen = (int_mb(i2_start(1,nbb)+it+2-1)
928     >             -  int_mb(i2_start(1,nbb)+it+1-1))
929
930            if (msglen.gt.0) then
931               call RCV(9+MSGDBL,
932     >                  tmp2(int_mb(i2_start(1,nbb)+it+1-1)),
933     >                  mdtob(2*msglen),rcv_len,
934     >                  Parallel2d_convert_taskid_i(source),
935     >                  rcv_proc,1)
936            end if
937         end if
938
939*        **** send packed array to other processors ****
940         if (int_mb(channel_type(1,1)+c-1) .eq. 0) then
941            pto=int_mb(channel_proc(1,1)+c-1)
942            it = mod((pto-taskid+np),np)
943
944            msglen    = (int_mb(i1_start(1,nbb)+it+2-1)
945     >                - int_mb(i1_start(1,nbb)+it+1-1))
946
947            if (msglen.gt.0) then
948               call SND(9+MSGDBL,
949     >                  tmp1(int_mb(i1_start(1,nbb)+it+1-1)),
950     >                  mdtob(2*msglen),
951     >                  Parallel2d_convert_taskid_i(pto),
952     >                  1)
953            end if
954         end if
955
956      end do
957
958
959*     **** unpack A(i) array ****
960      call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A)
961      call D3dB_pfft_index2_zero(nfft3d(id)-n2,
962     >                           int_mb(iz_to_i2(1,nbb)),A)
963
964
965      return
966      end
967
968
969
970*     ***********************************
971*     *					*
972*     *	   D3dB_c_ptranspose2_jk	*
973*     *					*
974*     ***********************************
975
976      subroutine D3dB_c_ptranspose2_jk(nbb,A,tmp1,tmp2)
977
978*****************************************************
979*                                                   *
980*      This routine performs the operation          *
981*               A(i,k,j) <- A(i,j,k)                *
982*                                                   *
983*      np = the number of worker nodes              *
984*      proc#=0...(np-1)
985*                                                   *
986*       this transpose uses more buffer space       *
987*       then transpose2                             *
988*****************************************************
989      implicit none
990      integer     nbb
991      complex*16  A(*)
992      complex*16  tmp1(*),tmp2(*)
993
994#include "bafdecls.fh"
995#include "D3dB.fh"
996
997*     **** indexing variables ****
998      integer iq_to_i1(2,0:3)
999      integer iq_to_i2(2,0:3)
1000      integer iz_to_i2(2,0:3)
1001      integer i1_start(2,0:3)
1002      integer i2_start(2,0:3)
1003      common / ptrans_blk2 / iq_to_i1,iq_to_i2,iz_to_i2,
1004     >                       i1_start,i2_start
1005
1006
1007
1008*     **** Used to avoid asynchronous communications ****
1009      integer Nchannels(NBLOCKS)
1010      integer channel_proc(2,NBLOCKS)
1011      integer channel_type(2,NBLOCKS)
1012      common / channel_blk / channel_proc,channel_type,Nchannels
1013
1014#include "tcgmsg.fh"
1015#include "msgtypesf.h"
1016      integer  rcv_len,rcv_proc
1017
1018*     **** local variables ***
1019      integer c,it
1020      integer source
1021      integer msglen
1022      integer pfrom,pto
1023      integer taskid,np
1024      integer n1,n2,id
1025
1026*     **** external functions ****
1027      integer  Parallel2d_convert_taskid_i
1028      external Parallel2d_convert_taskid_i
1029
1030      id = 2*(nbb/2) + 1
1031
1032      call Parallel2d_taskid_i(taskid)
1033      call Parallel2d_np_i(np)
1034
1035
1036      n1 = int_mb(i1_start(1,nbb)+np) - 1
1037      n2 = int_mb(i2_start(1,nbb)+np) - 1
1038
1039*     **** pack A(i) array ****
1040      call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1)
1041
1042
1043*     **** it = 0, transpose data on same thread ****
1044      msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1)
1045      call dcopy(2*msglen,
1046     >           tmp1(int_mb(i1_start(1,nbb)+1-1)),1,
1047     >           tmp2(int_mb(i2_start(1,nbb)+1-1)),1)
1048
1049
1050      do c=1,Nchannels(1)
1051*        **** receive packed array data ****
1052         if (int_mb(channel_type(1,1)+c-1) .eq. 1) then
1053            pfrom=int_mb(channel_proc(1,1)+c-1)
1054            it = mod((taskid+np-pfrom),np)
1055
1056            source=pfrom
1057            msglen = (int_mb(i2_start(1,nbb)+it+2-1)
1058     >             -  int_mb(i2_start(1,nbb)+it+1-1))
1059
1060            if (msglen.gt.0) then
1061               call RCV(9+MSGDBL,
1062     >                  tmp2(int_mb(i2_start(1,nbb)+it+1-1)),
1063     >                  mdtob(2*msglen),rcv_len,
1064     >                  Parallel2d_convert_taskid_i(source),
1065     >                  rcv_proc,1)
1066            end if
1067         end if
1068
1069*        **** send packed array to other processors ****
1070         if (int_mb(channel_type(1,1)+c-1) .eq. 0) then
1071            pto=int_mb(channel_proc(1,1)+c-1)
1072            it = mod((pto-taskid+np),np)
1073
1074            msglen    = (int_mb(i1_start(1,nbb)+it+2-1)
1075     >                - int_mb(i1_start(1,nbb)+it+1-1))
1076
1077            if (msglen.gt.0) then
1078               call SND(9+MSGDBL,
1079     >                  tmp1(int_mb(i1_start(1,nbb)+it+1-1)),
1080     >                  mdtob(2*msglen),
1081     >                  Parallel2d_convert_taskid_i(pto),
1082     >                  1)
1083            end if
1084         end if
1085
1086      end do
1087
1088*     **** unpack A(i) array ****
1089      call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A)
1090      call D3dB_pfft_index2_zero(nfft3d(id)-n2,
1091     >                           int_mb(iz_to_i2(1,nbb)),A)
1092
1093      return
1094      end
1095
1096
1097
1098*     ***********************************
1099*     *                                 *
1100*     *    D3dB_c_timereverse_start     *
1101*     *                                 *
1102*     ***********************************
1103*
1104*      This routine performs the operation
1105*            A(i,j,k) <- conjugate(A(i,-j,-k))
1106*
1107*      np = the number of worker nodes
1108*      proc#=0...(np-1)
1109*
1110      subroutine D3dB_c_timereverse_start(nb,A,tmp1,tmp2,request,reqcnt)
1111      implicit none
1112      integer     nb
1113      complex*16  A(*)
1114      complex*16  tmp1(*)
1115      complex*16  tmp2(*)
1116      integer     request(*),reqcnt
1117
1118#include "bafdecls.fh"
1119#include "D3dB.fh"
1120
1121
1122*     **** indexing variables ****
1123c     integer iq_to_i1(2**NFFT2*NSLABS)
1124c     integer iq_to_i2(2**NFFT2*NSLABS)
1125c     integer i1_start(NFFT3+1)
1126c     integer i2_start(NFFT3+1)
1127      integer iq_to_i1(2,NBLOCKS)
1128      integer iq_to_i2(2,NBLOCKS)
1129      integer i1_start(2,NBLOCKS)
1130      integer i2_start(2,NBLOCKS)
1131      common / timereverse_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
1132
1133*     **** Used to avoid asynchronous communications ****
1134      integer Nchannels(NBLOCKS)
1135      integer channel_proc(2,NBLOCKS)
1136      integer channel_type(2,NBLOCKS)
1137      common / channel_blk / channel_proc,channel_type,Nchannels
1138
1139#include "tcgmsg.fh"
1140#include "msgtypesf.h"
1141      integer  rcv_len,rcv_proc
1142
1143*     **** local variables ***
1144      integer i,c
1145      integer it
1146      integer source
1147      integer msglen
1148      integer pfrom,pto
1149      integer taskid,np
1150      integer index1
1151
1152*     **** external functions ****
1153      integer  Parallel2d_convert_taskid_i
1154      external Parallel2d_convert_taskid_i
1155
1156
1157      call Parallel2d_taskid_i(taskid)
1158      call Parallel2d_np_i(np)
1159
1160*     **** pack A(i) array ****
1161      do index1=int_mb(i1_start(1,nb)+1-1),
1162     >         (int_mb(i1_start(1,nb)+np+1-1)-1)
1163         tmp1(index1) = A(int_mb(iq_to_i1(1,nb)+index1-1))
1164      end do
1165
1166*     **** it = 0, transpose data on same thread ****
1167      msglen = int_mb(i2_start(1,nb)+2-1) - int_mb(i2_start(1,nb)+1-1)
1168      do i=1,msglen
1169         tmp2(int_mb(i2_start(1,nb)+1-1)+i-1)
1170     > = tmp1(int_mb(i1_start(1,nb)+1-1)+i-1)
1171      end do
1172
1173
1174      do c=1,Nchannels(nb)
1175*        **** receive packed array data ****
1176         if (int_mb(channel_type(1,nb)+c-1) .eq. 1) then
1177            pfrom=int_mb(channel_proc(1,nb)+c-1)
1178            it = mod((taskid+np-pfrom),np)
1179
1180            source=pfrom
1181            msglen = (int_mb(i2_start(1,nb)+it+2-1)
1182     >             -  int_mb(i2_start(1,nb)+it+1-1))
1183
1184            if (msglen.gt.0) then
1185               call RCV(9+MSGDBL,
1186     >                  tmp2(int_mb(i2_start(1,nb)+it+1-1)),
1187     >                  mdtob(2*msglen),rcv_len,
1188     >                  Parallel2d_convert_taskid_i(source),
1189     >                  rcv_proc,1)
1190            end if
1191         end if
1192
1193*        **** send packed array to other processors ****
1194         if (int_mb(channel_type(1,nb)+c-1) .eq. 0) then
1195            pto=int_mb(channel_proc(1,nb)+c-1)
1196            it = mod((pto-taskid+np),np)
1197
1198            msglen    = (int_mb(i1_start(1,nb)+it+2-1)
1199     >                - int_mb(i1_start(1,nb)+it+1-1))
1200
1201            if (msglen.gt.0) then
1202               call SND(9+MSGDBL,
1203     >                  tmp1(int_mb(i1_start(1,nb)+it+1-1)),
1204     >                  mdtob(2*msglen),
1205     >                  Parallel2d_convert_taskid_i(pto),1)
1206            end if
1207         end if
1208
1209      end do
1210
1211
1212      return
1213      end
1214
1215
1216
1217*     ***********************************
1218*     *                                 *
1219*     *    D3dB_c_timereverse_end       *
1220*     *                                 *
1221*     ***********************************
1222*
1223*      This routine performs the operation
1224*            A(i,j,k) <- conjugate(A(i,-j,-k))
1225*
1226*      np = the number of worker nodes
1227*      proc#=0...(np-1)
1228*
1229      subroutine D3dB_c_timereverse_end(nb,A,tmp1,tmp2,request,reqcnt)
1230      implicit none
1231      integer     nb
1232      complex*16  A(*)
1233      complex*16  tmp1(*)
1234      complex*16  tmp2(*)
1235      integer     request(*),reqcnt
1236
1237#include "bafdecls.fh"
1238#include "errquit.fh"
1239
1240
1241*     **** indexing variables ****
1242      integer iq_to_i1(2,NBLOCKS)
1243      integer iq_to_i2(2,NBLOCKS)
1244      integer i1_start(2,NBLOCKS)
1245      integer i2_start(2,NBLOCKS)
1246      common / timereverse_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
1247
1248
1249*     **** local variables ***
1250      integer np
1251      integer index2
1252
1253
1254      call Parallel2d_np_i(np)
1255
1256*     **** unpack A(i) array ****
1257#ifndef CRAY
1258!DIR$ ivdep
1259#endif
1260      do index2=int_mb(i2_start(1,nb)+1-1),
1261     >         (int_mb(i2_start(1,nb)+np+1-1)-1)
1262        A(int_mb(iq_to_i2(1,nb)+index2-1))=dconjg(tmp2(index2))
1263      end do
1264
1265      return
1266      end
1267
1268
1269*     ************************************
1270*     *                                  *
1271*     *         Balance_c_balance_start  *
1272*     *                                  *
1273*     ************************************
1274
1275      subroutine Balance_c_balance_start(nb,A,request,reqcnt,msgtype)
1276      implicit none
1277      integer nb
1278      complex*16 A(*)
1279      integer    request(*),reqcnt,msgtype
1280
1281      call Balance_c_balance(nb,A)
1282      return
1283      end
1284
1285*     ************************************
1286*     *                                  *
1287*     *         Balance_c_balance_end    *
1288*     *                                  *
1289*     ************************************
1290*
1291      subroutine Balance_c_balance_end(nb,A,request,reqcnt)
1292      implicit none
1293      integer nb
1294      complex*16 A(*)
1295      integer    request(*),reqcnt
1296
1297*     *** dummy routine ***
1298      return
1299      end
1300
1301*     ************************************
1302*     *                                  *
1303*     *    Balance_c_unbalance_start     *
1304*     *                                  *
1305*     ************************************
1306
1307      subroutine Balance_c_unbalance_start(nb,A,request,reqcnt,msgtype)
1308      implicit none
1309      integer nb
1310      complex*16 A(*)
1311      integer    request(*),reqcnt,msgtype
1312
1313      call Balance_c_unbalance(nb,A)
1314      return
1315      end
1316
1317*     ************************************
1318*     *                                  *
1319*     *       Balance_c_unbalance_end    *
1320*     *                                  *
1321*     ************************************
1322
1323      subroutine Balance_c_unbalance_end(nb,A,request,reqcnt)
1324      implicit none
1325      integer nb
1326      complex*16 A(*)
1327      integer    request(*),reqcnt
1328
1329*     *** dummy routine ***
1330      return
1331      end
1332
1333
1334
1335
1336*     ***********************************
1337*     *					*
1338*     *	   D3dB_c_ptranspose1_jk_start	*
1339*     *					*
1340*     ***********************************
1341
1342*
1343*      This routine performs the operation
1344*               A(i,k,j) <- A(i,j,k)
1345*
1346*      np = the number of worker nodes
1347*      proc#=0...(np-1)
1348*
1349*       this transpose uses more buffer space
1350*       then transpose2
1351*
1352
1353      subroutine D3dB_c_ptranspose1_jk_start(nbb,A,tmp1,tmp2,
1354     >                                       request,reqcnt,msgtype)
1355      implicit none
1356      integer nbb
1357      complex*16  A(*)
1358      complex*16  tmp1(*),tmp2(*)
1359      integer request(*),reqcnt,msgtype
1360
1361#include "bafdecls.fh"
1362#include "errquit.fh"
1363#include "D3dB.fh"
1364
1365*     **** indexing variables ****
1366      integer iq_to_i1(2,0:3)
1367      integer iq_to_i2(2,0:3)
1368      integer iz_to_i2(2,0:3)
1369      integer i1_start(2,0:3)
1370      integer i2_start(2,0:3)
1371      common / ptrans_blk1 / iq_to_i1,iq_to_i2,iz_to_i2,
1372     >                       i1_start,i2_start
1373
1374*     **** Used to avoid asynchronous communications ****
1375      integer Nchannels(NBLOCKS)
1376      integer channel_proc(2,NBLOCKS)
1377      integer channel_type(2,NBLOCKS)
1378      common / channel_blk / channel_proc,channel_type,Nchannels
1379
1380#include "tcgmsg.fh"
1381#include "msgtypesf.h"
1382      integer  rcv_len,rcv_proc
1383
1384
1385*     **** local variables ***
1386      integer c,it
1387      integer source
1388      integer msglen
1389      integer pfrom,pto
1390      integer taskid,np
1391      integer n1
1392
1393*     **** external functions ****
1394      integer  Parallel2d_convert_taskid_i
1395      external Parallel2d_convert_taskid_i
1396
1397
1398      call Parallel2d_taskid_i(taskid)
1399      call Parallel2d_np_i(np)
1400
1401
1402
1403      n1 = int_mb(i1_start(1,nbb)+np) - 1
1404
1405*     **** pack A(i) array ****
1406       call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1)
1407
1408*     **** it = 0, transpose data on same thread ****
1409      msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1)
1410      call dcopy(2*msglen,
1411     >           tmp1(int_mb(i1_start(1,nbb)+1-1)),1,
1412     >           tmp2(int_mb(i2_start(1,nbb)+1-1)),1)
1413
1414
1415      do c=1,Nchannels(1)
1416*        **** receive packed array data ****
1417         if (int_mb(channel_type(1,1)+c-1) .eq. 1) then
1418            pfrom=int_mb(channel_proc(1,1)+c-1)
1419            it = mod((taskid+np-pfrom),np)
1420
1421            source=pfrom
1422            msglen = (int_mb(i2_start(1,nbb)+it+2-1)
1423     >             -  int_mb(i2_start(1,nbb)+it+1-1))
1424
1425            if (msglen.gt.0) then
1426               call RCV(9+MSGDBL,
1427     >                  tmp2(int_mb(i2_start(1,nbb)+it+1-1)),
1428     >                  mdtob(2*msglen),rcv_len,
1429     >                  Parallel2d_convert_taskid_i(source),
1430     >                  rcv_proc,1)
1431            end if
1432         end if
1433
1434*        **** send packed array to other processors ****
1435         if (int_mb(channel_type(1,1)+c-1) .eq. 0) then
1436            pto=int_mb(channel_proc(1,1)+c-1)
1437            it = mod((pto-taskid+np),np)
1438
1439            msglen    = (int_mb(i1_start(1,nbb)+it+2-1)
1440     >                - int_mb(i1_start(1,nbb)+it+1-1))
1441
1442            if (msglen.gt.0) then
1443               call SND(9+MSGDBL,
1444     >                  tmp1(int_mb(i1_start(1,nbb)+it+1-1)),
1445     >                  mdtob(2*msglen),
1446     >                  Parallel2d_convert_taskid_i(pto),1)
1447            end if
1448         end if
1449
1450      end do
1451
1452      return
1453      end
1454
1455
1456*     ***********************************
1457*     *					*
1458*     *	   D3dB_c_ptranspose1_jk_end	*
1459*     *					*
1460*     ***********************************
1461
1462*
1463*      This routine performs the operation
1464*               A(i,k,j) <- A(i,j,k)
1465*
1466*      np = the number of worker nodes
1467*      proc#=0...(np-1)
1468*
1469*       this transpose uses more buffer space
1470*       then transpose2
1471*
1472
1473      subroutine D3dB_c_ptranspose1_jk_end(nbb,A,tmp2,request,reqcnt)
1474
1475      implicit none
1476      integer nbb
1477      complex*16  A(*)
1478      complex*16  tmp2(*)
1479      integer     request(*),reqcnt
1480
1481#include "bafdecls.fh"
1482#include "errquit.fh"
1483#include "D3dB.fh"
1484
1485
1486*     **** indexing variables ****
1487      integer iq_to_i1(2,0:3)
1488      integer iq_to_i2(2,0:3)
1489      integer iz_to_i2(2,0:3)
1490      integer i1_start(2,0:3)
1491      integer i2_start(2,0:3)
1492      common / ptrans_blk1 / iq_to_i1,iq_to_i2,iz_to_i2,
1493     >                       i1_start,i2_start
1494
1495*     **** local variables ***
1496      integer np,n2,id
1497
1498      id = 2*(nbb/2) + 1
1499
1500      call Parallel2d_np_i(np)
1501
1502*     **** unpack A(i) array ****
1503      n2 = int_mb(i2_start(1,nbb)+np) - 1
1504      call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A)
1505      call D3dB_pfft_index2_zero(nfft3d(id)-n2,
1506     >                           int_mb(iz_to_i2(1,nbb)),A)
1507
1508      return
1509      end
1510
1511
1512
1513*     ***********************************
1514*     *					*
1515*     *	   D3dB_c_ptranspose2_jk_start	*
1516*     *					*
1517*     ***********************************
1518
1519*
1520*      This routine performs the operation
1521*               A(i,k,j) <- A(i,j,k)
1522*
1523*      np = the number of worker nodes
1524*      proc#=0...(np-1)
1525*
1526*       this transpose uses more buffer space
1527*       then transpose2
1528*
1529
1530      subroutine D3dB_c_ptranspose2_jk_start(nbb,A,tmp1,tmp2,
1531     >                                       request,reqcnt,msgtype)
1532      implicit none
1533      integer nbb
1534      complex*16  A(*)
1535      complex*16  tmp1(*),tmp2(*)
1536      integer request(*),reqcnt,msgtype
1537
1538#include "bafdecls.fh"
1539#include "errquit.fh"
1540#include "D3dB.fh"
1541
1542
1543*     **** indexing variables ****
1544      integer iq_to_i1(2,0:3)
1545      integer iq_to_i2(2,0:3)
1546      integer iz_to_i2(2,0:3)
1547      integer i1_start(2,0:3)
1548      integer i2_start(2,0:3)
1549      common / ptrans_blk2 / iq_to_i1,iq_to_i2,iz_to_i2,
1550     >                       i1_start,i2_start
1551
1552
1553
1554*     **** Used to avoid asynchronous communications ****
1555      integer Nchannels(NBLOCKS)
1556      integer channel_proc(2,NBLOCKS)
1557      integer channel_type(2,NBLOCKS)
1558      common / channel_blk / channel_proc,channel_type,Nchannels
1559
1560#include "tcgmsg.fh"
1561#include "msgtypesf.h"
1562      integer  rcv_len,rcv_proc
1563
1564
1565*     **** local variables ***
1566      integer c,it
1567      integer source
1568      integer msglen
1569      integer pfrom,pto
1570      integer taskid,np
1571      integer n1
1572
1573*     **** external functions ****
1574      integer  Parallel2d_convert_taskid_i
1575      external Parallel2d_convert_taskid_i
1576
1577
1578      call Parallel2d_taskid_i(taskid)
1579      call Parallel2d_np_i(np)
1580
1581
1582      n1 = int_mb(i1_start(1,nbb)+np) - 1
1583
1584*     **** pack A(i) array ****
1585       call D3dB_pfft_index1_copy(n1,int_mb(iq_to_i1(1,nbb)),A,tmp1)
1586
1587*     **** it = 0, transpose data on same thread ****
1588      msglen = int_mb(i2_start(1,nbb)+2-1) - int_mb(i2_start(1,nbb)+1-1)
1589      call dcopy(2*msglen,
1590     >           tmp1(int_mb(i1_start(1,nbb)+1-1)),1,
1591     >           tmp2(int_mb(i2_start(1,nbb)+1-1)),1)
1592
1593
1594      do c=1,Nchannels(1)
1595*        **** receive packed array data ****
1596         if (int_mb(channel_type(1,1)+c-1) .eq. 1) then
1597            pfrom=int_mb(channel_proc(1,1)+c-1)
1598            it = mod((taskid+np-pfrom),np)
1599
1600            source=pfrom
1601            msglen = (int_mb(i2_start(1,nbb)+it+2-1)
1602     >             -  int_mb(i2_start(1,nbb)+it+1-1))
1603
1604            if (msglen.gt.0) then
1605               call RCV(9+MSGDBL,
1606     >                  tmp2(int_mb(i2_start(1,nbb)+it+1-1)),
1607     >                  mdtob(2*msglen),rcv_len,
1608     >                  Parallel2d_convert_taskid_i(source),
1609     >                  rcv_proc,1)
1610            end if
1611         end if
1612
1613*        **** send packed array to other processors ****
1614         if (int_mb(channel_type(1,1)+c-1) .eq. 0) then
1615            pto=int_mb(channel_proc(1,1)+c-1)
1616            it = mod((pto-taskid+np),np)
1617
1618            msglen    = (int_mb(i1_start(1,nbb)+it+2-1)
1619     >                - int_mb(i1_start(1,nbb)+it+1-1))
1620
1621            if (msglen.gt.0) then
1622               call SND(9+MSGDBL,
1623     >                  tmp1(int_mb(i1_start(1,nbb)+it+1-1)),
1624     >                  mdtob(2*msglen),
1625     >                  Parallel2d_convert_taskid_i(pto),
1626     >                  1)
1627            end if
1628         end if
1629
1630      end do
1631
1632
1633      return
1634      end
1635
1636
1637*     ***********************************
1638*     *					*
1639*     *	   D3dB_c_ptranspose2_jk_end	*
1640*     *					*
1641*     ***********************************
1642
1643*
1644*      This routine performs the operation
1645*               A(i,k,j) <- A(i,j,k)
1646*
1647*      np = the number of worker nodes
1648*      proc#=0...(np-1)
1649*
1650*       this transpose uses more buffer space
1651*       then transpose2
1652*
1653
1654      subroutine D3dB_c_ptranspose2_jk_end(nbb,A,tmp2,request,reqcnt)
1655
1656      implicit none
1657      integer nbb
1658      complex*16  A(*)
1659      complex*16  tmp2(*)
1660      integer     request(*),reqcnt
1661
1662#include "bafdecls.fh"
1663#include "errquit.fh"
1664#include "D3dB.fh"
1665
1666
1667*     **** indexing variables ****
1668      integer iq_to_i1(2,0:3)
1669      integer iq_to_i2(2,0:3)
1670      integer iz_to_i2(2,0:3)
1671      integer i1_start(2,0:3)
1672      integer i2_start(2,0:3)
1673      common / ptrans_blk2 / iq_to_i1,iq_to_i2,iz_to_i2,
1674     >                       i1_start,i2_start
1675
1676
1677*     **** local variables ***
1678      integer np,n2,id
1679
1680      id = 2*(nbb/2) + 1
1681
1682      call Parallel2d_np_i(np)
1683
1684*     **** unpack A(i) array ****
1685      n2 = int_mb(i2_start(1,nbb)+np) - 1
1686      call D3dB_pfft_index2_copy(n2,int_mb(iq_to_i2(1,nbb)),tmp2,A)
1687      call D3dB_pfft_index2_zero(nfft3d(id)-n2,
1688     >                           int_mb(iz_to_i2(1,nbb)),A)
1689
1690      return
1691      end
1692
1693
1694
1695*     ***********************************
1696*     *					*
1697*     *	   D3dB_c_ptranspose_ijk_start	*
1698*     *					*
1699*     ***********************************
1700*
1701*      This routine performs the operation
1702*               A(i,k,j) <- A(i,j,k)
1703*
1704*      np = the number of worker nodes
1705*      proc#=0...(np-1)
1706*
1707*       this transpose uses more buffer space
1708*       then transpose2
1709*
1710
1711
1712      subroutine D3dB_c_ptranspose_ijk_start(nbb,op,A,tmp1,tmp2,
1713     >                                       request,reqcnt,msgtype)
1714
1715      implicit none
1716      integer nbb,op
1717      complex*16  A(*)
1718      complex*16  tmp1(*),tmp2(*)
1719      integer     request(*),reqcnt,msgtype
1720
1721#include "bafdecls.fh"
1722#include "errquit.fh"
1723#include "D3dB.fh"
1724
1725
1726*     **** indexing variables ****
1727      integer h_iq_to_i1(2,6,0:3)
1728      integer h_iq_to_i2(2,6,0:3)
1729      integer h_iz_to_i2(2,6,0:3)
1730      integer h_iz_to_i2_count(6,0:3)
1731      integer h_i1_start(2,6,0:3)
1732      integer h_i2_start(2,6,0:3)
1733      common / ptrans_blk_ijk / h_iq_to_i1,
1734     >                         h_iq_to_i2,
1735     >                         h_iz_to_i2,
1736     >                         h_iz_to_i2_count,
1737     >                         h_i1_start,
1738     >                         h_i2_start
1739
1740*     **** Used to avoid asynchronous communications ****
1741      integer Nchannels(NBLOCKS)
1742      integer channel_proc(2,NBLOCKS)
1743      integer channel_type(2,NBLOCKS)
1744      common / channel_blk / channel_proc,channel_type,Nchannels
1745
1746#include "tcgmsg.fh"
1747#include "msgtypesf.h"
1748      integer  rcv_len,rcv_proc
1749
1750
1751*     **** local variables ***
1752      integer c,n1
1753      integer it
1754      integer source
1755      integer msglen
1756      integer pfrom,pto
1757      integer taskid,np
1758
1759*     **** external functions ****
1760      integer  Parallel2d_convert_taskid_i
1761      external Parallel2d_convert_taskid_i
1762
1763
1764      call Parallel2d_taskid_i(taskid)
1765      call Parallel2d_np_i(np)
1766
1767*     **** pack A(i) array ****
1768      n1 = int_mb(h_i1_start(1,op,nbb)+np) - 1
1769      call D3dB_pfft_index1_copy(n1,int_mb(h_iq_to_i1(1,op,nbb)),A,tmp1)
1770
1771
1772*     **** it = 0, transpose data on same thread ****
1773      msglen = int_mb(h_i2_start(1,op,nbb)+2-1)
1774     >       - int_mb(h_i2_start(1,op,nbb)+1-1)
1775      call dcopy(2*msglen,
1776     >           tmp1(int_mb(h_i1_start(1,op,nbb))),1,
1777     >           tmp2(int_mb(h_i2_start(1,op,nbb))),1)
1778
1779
1780
1781      do c=1,Nchannels(1)
1782*        **** receive packed array data ****
1783         if (int_mb(channel_type(1,1)+c-1) .eq. 1) then
1784            pfrom=int_mb(channel_proc(1,1)+c-1)
1785            it = mod((taskid+np-pfrom),np)
1786
1787            source=pfrom
1788            msglen = (int_mb(h_i2_start(1,op,nbb)+it+2-1)
1789     >             -  int_mb(h_i2_start(1,op,nbb)+it+1-1))
1790
1791            if (msglen.gt.0) then
1792               call RCV(9+MSGDBL,
1793     >                  tmp2(int_mb(h_i2_start(1,op,nbb)+it+1-1)),
1794     >                  mdtob(2*msglen),rcv_len,
1795     >                  Parallel2d_convert_taskid_i(source),
1796     >                  rcv_proc,1)
1797            end if
1798         end if
1799
1800*        **** send packed array to other processors ****
1801         if (int_mb(channel_type(1,1)+c-1) .eq. 0) then
1802            pto=int_mb(channel_proc(1,1)+c-1)
1803            it = mod((pto-taskid+np),np)
1804
1805            msglen    = (int_mb(h_i1_start(1,op,nbb)+it+2-1)
1806     >                -  int_mb(h_i1_start(1,op,nbb)+it+1-1))
1807
1808            if (msglen.gt.0) then
1809               call SND(9+MSGDBL,
1810     >                  tmp1(int_mb(h_i1_start(1,op,nbb)+it+1-1)),
1811     >                  mdtob(2*msglen),
1812     >                  Parallel2d_convert_taskid_i(pto),
1813     >                  1)
1814            end if
1815         end if
1816
1817      end do
1818
1819      return
1820      end
1821
1822
1823
1824*     ***********************************
1825*     *					*
1826*     *	   D3dB_c_ptranspose_ijk_end	*
1827*     *					*
1828*     ***********************************
1829*
1830*      This routine performs the operation
1831*               A(i,k,j) <- A(i,j,k)
1832*
1833*      np = the number of worker nodes
1834*      proc#=0...(np-1)
1835*
1836*       this transpose uses more buffer space
1837*       then transpose2
1838*
1839
1840      subroutine D3dB_c_ptranspose_ijk_end(nbb,op,A,tmp2,
1841     >                                 request,reqcnt)
1842
1843      implicit none
1844      integer nbb,op
1845      complex*16  A(*)
1846      complex*16  tmp2(*)
1847      integer request(*),reqcnt
1848
1849#include "bafdecls.fh"
1850#include "errquit.fh"
1851#include "D3dB.fh"
1852
1853
1854*     **** indexing variables ****
1855      integer h_iq_to_i1(2,6,0:3)
1856      integer h_iq_to_i2(2,6,0:3)
1857      integer h_iz_to_i2(2,6,0:3)
1858      integer h_iz_to_i2_count(6,0:3)
1859      integer h_i1_start(2,6,0:3)
1860      integer h_i2_start(2,6,0:3)
1861      common / ptrans_blk_ijk / h_iq_to_i1,
1862     >                         h_iq_to_i2,
1863     >                         h_iz_to_i2,
1864     >                         h_iz_to_i2_count,
1865     >                         h_i1_start,
1866     >                         h_i2_start
1867
1868*     **** local variables ***
1869      integer n2,n3,np
1870
1871
1872      call Parallel2d_np_i(np)
1873
1874*     **** unpack A(i) array ****
1875      n2 = int_mb(h_i2_start(1,op,nbb)+np) - 1
1876      n3 = h_iz_to_i2_count(op,nbb)
1877      call D3dB_pfft_index2_copy(n2,int_mb(h_iq_to_i2(1,op,nbb)),tmp2,A)
1878      call D3dB_pfft_index2_zero(n3,int_mb(h_iz_to_i2(1,op,nbb)),A)
1879
1880
1881      return
1882      end
1883
1884
1885
1886*     ***********************************
1887*     *					*
1888*     *	   D3dB_c_ptranspose_ijk	*
1889*     *					*
1890*     ***********************************
1891
1892      subroutine D3dB_c_ptranspose_ijk(nbb,op,A,tmp1,tmp2)
1893
1894*****************************************************
1895*                                                   *
1896*      This routine performs the operation          *
1897*               A(i,k,j) <- A(i,j,k)                *
1898*                                                   *
1899*      np = the number of worker nodes              *
1900*      proc#=0...(np-1)
1901*                                                   *
1902*       this transpose uses more buffer space       *
1903*       then transpose2                             *
1904*****************************************************
1905      implicit none
1906      integer nbb,op
1907      complex*16  A(*)
1908      complex*16  tmp1(*),tmp2(*)
1909
1910#include "bafdecls.fh"
1911#include "errquit.fh"
1912#include "D3dB.fh"
1913
1914
1915*     **** indexing variables ****
1916      integer h_iq_to_i1(2,6,0:3)
1917      integer h_iq_to_i2(2,6,0:3)
1918      integer h_iz_to_i2(2,6,0:3)
1919      integer h_iz_to_i2_count(6,0:3)
1920      integer h_i1_start(2,6,0:3)
1921      integer h_i2_start(2,6,0:3)
1922      common / ptrans_blk_ijk / h_iq_to_i1,
1923     >                         h_iq_to_i2,
1924     >                         h_iz_to_i2,
1925     >                         h_iz_to_i2_count,
1926     >                         h_i1_start,
1927     >                         h_i2_start
1928
1929*     **** Used to avoid asynchronous communications ****
1930      integer Nchannels(NBLOCKS)
1931      integer channel_proc(2,NBLOCKS)
1932      integer channel_type(2,NBLOCKS)
1933      common / channel_blk / channel_proc,channel_type,Nchannels
1934
1935#include "tcgmsg.fh"
1936#include "msgtypesf.h"
1937      integer  rcv_len,rcv_proc
1938
1939
1940*     **** local variables ***
1941      integer c,n1,n2,n3
1942      integer it
1943      integer source
1944      integer msglen
1945      integer pfrom,pto
1946      integer taskid,np
1947
1948*     **** external functions ****
1949      integer  Parallel2d_convert_taskid_i
1950      external Parallel2d_convert_taskid_i
1951
1952
1953      call Parallel2d_taskid_i(taskid)
1954      call Parallel2d_np_i(np)
1955
1956      n1 = int_mb(h_i1_start(1,op,nbb)+np) - 1
1957      n2 = int_mb(h_i2_start(1,op,nbb)+np) - 1
1958      n3 = h_iz_to_i2_count(op,nbb)
1959
1960
1961*     **** pack A(i) array ****
1962      call D3dB_pfft_index1_copy(n1,int_mb(h_iq_to_i1(1,op,nbb)),A,tmp1)
1963
1964
1965*     **** it = 0, transpose data on same thread ****
1966      msglen = int_mb(h_i2_start(1,op,nbb)+2-1)
1967     >       - int_mb(h_i2_start(1,op,nbb)+1-1)
1968      call dcopy(2*msglen,
1969     >           tmp1(int_mb(h_i1_start(1,op,nbb))),1,
1970     >           tmp2(int_mb(h_i2_start(1,op,nbb))),1)
1971
1972
1973      do c=1,Nchannels(1)
1974*        **** receive packed array data ****
1975         if (int_mb(channel_type(1,1)+c-1) .eq. 1) then
1976            pfrom=int_mb(channel_proc(1,1)+c-1)
1977            it = mod((taskid+np-pfrom),np)
1978
1979            source=pfrom
1980            msglen = (int_mb(h_i2_start(1,op,nbb)+it+2-1)
1981     >             -  int_mb(h_i2_start(1,op,nbb)+it+1-1))
1982
1983            if (msglen.gt.0) then
1984               call RCV(9+MSGDBL,
1985     >                  tmp2(int_mb(h_i2_start(1,op,nbb)+it+1-1)),
1986     >                  mdtob(2*msglen),rcv_len,
1987     >                  Parallel2d_convert_taskid_i(source),
1988     >                  rcv_proc,1)
1989            end if
1990         end if
1991
1992*        **** send packed array to other processors ****
1993         if (int_mb(channel_type(1,1)+c-1) .eq. 0) then
1994            pto=int_mb(channel_proc(1,1)+c-1)
1995            it = mod((pto-taskid+np),np)
1996
1997            msglen    = (int_mb(h_i1_start(1,op,nbb)+it+2-1)
1998     >                -  int_mb(h_i1_start(1,op,nbb)+it+1-1))
1999
2000            if (msglen.gt.0) then
2001               call SND(9+MSGDBL,
2002     >                  tmp1(int_mb(h_i1_start(1,op,nbb)+it+1-1)),
2003     >                  mdtob(2*msglen),
2004     >                  Parallel2d_convert_taskid_i(pto),
2005     >                  1)
2006            end if
2007         end if
2008
2009      end do
2010
2011*     **** unpack A(i) array ****
2012      call D3dB_pfft_index2_copy(n2,int_mb(h_iq_to_i2(1,op,nbb)),tmp2,A)
2013      call D3dB_pfft_index2_zero(n3,int_mb(h_iz_to_i2(1,op,nbb)),A)
2014
2015
2016      return
2017      end
2018
2019
2020*     ***********************************
2021*     *                                 *
2022*     *      D3dB_channel_init          *
2023*     *                                 *
2024*     ***********************************
2025
2026      subroutine D3dB_channel_init(nb)
2027      implicit none
2028      integer nb
2029
2030#include "bafdecls.fh"
2031#include "errquit.fh"
2032#include "D3dB.fh"
2033
2034
2035*     **** Used to avoid asynchronous communications ****
2036      integer Nchannels(NBLOCKS)
2037      integer channel_proc(2,NBLOCKS)
2038      integer channel_type(2,NBLOCKS)
2039      common / channel_blk / channel_proc,channel_type,Nchannels
2040      integer pair1(2),pair2(2)
2041      integer pair_step(2),pair_tmp(2)
2042      integer step,Nstep,icount,jcount,jcount_max
2043
2044*     **** local variables ****
2045      integer np,taskid
2046      integer i,j,k
2047      logical value
2048
2049
2050      call Parallel2d_taskid_i(taskid)
2051      call Parallel2d_np_i(np)
2052*
2053*     **** Define Channels - which are used to avoid ****
2054*     **** asynchronous communications               ****
2055
2056      value = BA_alloc_get(mt_int,(2*np),
2057     >        'channel_proc',channel_proc(2,nb),channel_proc(1,nb))
2058      value = value.and.
2059     >        BA_alloc_get(mt_int,(2*np),
2060     >        'channel_type',channel_type(2,nb),channel_type(1,nb))
2061      if (.not. value)
2062     > call errquit('D3dB_channel_init:out of heap memory',0, MA_ERR)
2063
2064
2065
2066      value = BA_push_get(mt_int,(np*(np-1)/2),
2067     >                    'pair1',pair1(2),pair1(1))
2068      value = value.and.
2069     >        BA_push_get(mt_int,(np*(np-1)/2),
2070     >                    'pair2',pair2(2),pair2(1))
2071      value = value.and.
2072     >        BA_push_get(mt_int,(np*(np-1)/2),
2073     >                'pair_step',pair_step(2),pair_step(1))
2074      value = value.and.
2075     >        BA_push_get(mt_int,(np*(np-1)/2),
2076     >                'pair_tmp',pair_tmp(2),pair_tmp(1))
2077      if (.not. value)
2078     >  call errquit('D3dB_channel_init:out of stack memory',0, MA_ERR)
2079
2080*     *** define pair1,pair2 ****
2081      icount = 0
2082      do i=0,     (np-1)
2083      do j=(i+1), (np-1)
2084         icount = icount + 1
2085         int_mb(pair1(1)+icount-1) = i
2086         int_mb(pair2(1)+icount-1) = j
2087      end do
2088      end do
2089
2090*     **** define pair_step ****
2091      do i=1,(np*(np-1)/2)
2092         int_mb(pair_step(1)+i-1) = (-1)
2093      end do
2094
2095      step=0
2096      jcount = 0
2097      jcount_max = np*(np-1)/2
2098c      do while(.not. full_ps(int_mb(pair_step(1)),np))
2099      do while(jcount.lt.jcount_max)
2100         step=step+1
2101
2102         icount = 0
2103         do i=1, (np*(np-1)/2)
2104            if (int_mb(pair_step(1)+i-1).eq.(-1)) then
2105               value=.true.
2106               do k=1,icount
2107                 j = int_mb(pair_tmp(1)+k-1)
2108
2109                 if
2110     >        ((int_mb(pair1(1)+i-1).eq.int_mb(pair1(1)+j-1)).or.
2111     >         (int_mb(pair1(1)+i-1).eq.int_mb(pair2(1)+j-1)).or.
2112     >         (int_mb(pair2(1)+i-1).eq.int_mb(pair1(1)+j-1)).or.
2113     >         (int_mb(pair2(1)+i-1).eq.int_mb(pair2(1)+j-1)))
2114     >           value=.false.
2115
2116               end do
2117               if (value) then
2118                 int_mb(pair_step(1)+i-1) = step
2119                 icount = icount + 1
2120                 jcount = jcount + 1
2121                 int_mb(pair_tmp(1)+icount-1) = i
2122               end if
2123            end if
2124         end do
2125      end do
2126      Nstep=step
2127
2128
2129*     **** define channels ***
2130      Nchannels(nb)=0
2131      do step=1,Nstep
2132         do i=1,(np*(np-1)/2)
2133            if (int_mb(pair_step(1)+i-1).eq.step) then
2134*              **** send then recv ****
2135               if (int_mb(pair1(1)+i-1).eq.taskid) then
2136                  Nchannels(nb)=Nchannels(nb)+1
2137                  int_mb(channel_proc(1,nb)+Nchannels(nb)-1)
2138     >             = int_mb(pair2(1)+i-1)
2139                  int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 0
2140                  Nchannels(nb)=Nchannels(nb)+1
2141                  int_mb(channel_proc(1,nb)+Nchannels(nb)-1)
2142     >            = int_mb(pair2(1)+i-1)
2143                  int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 1
2144               end if
2145
2146*              **** recv then send ****
2147               if (int_mb(pair2(1)+i-1).eq.taskid) then
2148                  Nchannels(nb)=Nchannels(nb)+1
2149                  int_mb(channel_proc(1,nb)+Nchannels(nb)-1)
2150     >            = int_mb(pair1(1)+i-1)
2151                  int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 1
2152                  Nchannels(nb)=Nchannels(nb)+1
2153                  int_mb(channel_proc(1,nb)+Nchannels(nb)-1)
2154     >            = int_mb(pair1(1)+i-1)
2155                  int_mb(channel_type(1,nb)+Nchannels(nb)-1) = 0
2156               end if
2157            end if
2158         end do
2159      end do
2160
2161
2162      value=          BA_pop_stack(pair_tmp(2))
2163      value=value.and.BA_pop_stack(pair_step(2))
2164      value=value.and.BA_pop_stack(pair2(2))
2165      value=value.and.BA_pop_stack(pair1(2))
2166      if (.not. value)
2167     >  call errquit('D3dB_channel_init:popping stack',0, MA_ERR)
2168
2169      return
2170      end
2171
2172c      logical function full_ps(ps,np)
2173c      implicit none
2174c      integer ps(*)
2175c      integer np
2176c
2177c      integer i
2178c      logical value
2179c
2180c      value=.true.
2181c      do i=1,(np*(np-1)/2)
2182c         if (ps(i).eq.(-1)) value=.false.
2183c      end do
2184c
2185c      full_ps=value
2186c      return
2187c      end
2188
2189*     ***********************************
2190*     *					*
2191*     *	       D3dB_(c,r,t)_read 	*
2192*     *					*
2193*     ***********************************
2194
2195      subroutine D3dB_c_read_pio(nb,iunit,A,tmp,jcol)
2196      implicit none
2197      integer nb
2198      integer iunit
2199      complex*16 A(*)
2200      complex*16 tmp(*)
2201      integer    jcol
2202
2203#include "bafdecls.fh"
2204#include "errquit.fh"
2205
2206#include "D3dB.fh"
2207
2208#include "tcgmsg.fh"
2209#include "msgtypesf.h"
2210      integer rcv_len,rcv_proc
2211
2212
2213*     *** local variables ***
2214      logical value,fillcolumn,readcolumn
2215      integer MASTER,taskid,taskid_i
2216      parameter(MASTER=0)
2217      integer p_to, p_here,q
2218      integer index,k,j
2219      integer source,msglen
2220      integer tmp1(2),tmp2(2)
2221
2222      integer taskid_j,np_j
2223      integer ii,jj,jstart,jend
2224
2225*     **** external functions ****
2226      integer  Parallel2d_convert_taskid_ij
2227      external Parallel2d_convert_taskid_ij
2228
2229      call Parallel_taskid(taskid)
2230      call Parallel2d_taskid_i(taskid_i)
2231      call Parallel2d_taskid_j(taskid_j)
2232
2233c      call Parallel2d_np_j(np_j)
2234c      call Parallel2d_taskid_j(taskid_j)
2235      if (jcol.lt.0) then
2236c         jstart = 0
2237c         jend   = np_j-1
2238         fillcolumn = .true.
2239         readcolumn = .true.
2240      else
2241c         jstart = jcol
2242c         jend   = jcol
2243         fillcolumn = (taskid_j.eq.jcol)
2244         readcolumn = (taskid_j.eq.jcol)
2245      end if
2246
2247      if (readcolumn) then
2248
2249
2250      !**********************
2251      !**** slab mapping ****
2252      !**********************
2253      if (mapping.eq.1) then
2254*     **** master node reads from file and distributes ****
2255      if (taskid_i.eq.MASTER) then
2256         do k=1,nz(nb)
2257
2258            call dread(iunit,tmp,(nx(nb)+2)*ny(nb))
2259
2260            call D3dB_ktoqp(nb,k,q,ii)
2261            !do jj=jstart,jend
2262               p_to = Parallel2d_convert_taskid_ij(ii,taskid_j)
2263               !p_to = Parallel2d_convert_taskid_ij(ii,jj)
2264               if (ii.eq.MASTER) then
2265                  index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
2266                  call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1)
2267               else
2268                  msglen = (nx(nb)/2+1)*ny(nb)
2269                  call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1)
2270               end if
2271            !end do
2272         end do
2273
2274*     **** not master node ****
2275      else if (fillcolumn) then
2276         do k=1,nz(nb)
2277            call D3dB_ktoqp(nb,k,q,ii)
2278            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2279            if (p_here.eq.taskid) then
2280
2281               msglen = (nx(nb)/2+1)*ny(nb)
2282               !source = MASTER
2283               source = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2284
2285               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
2286     >                  source,rcv_proc,1)
2287
2288
2289               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
2290               call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1)
2291
2292            end if
2293         end do
2294      end if
2295
2296      !*************************
2297      !**** hilbert mapping ****
2298      !*************************
2299      else
2300
2301*     **** master node reads from file and distributes ****
2302      if (taskid_i.eq.MASTER) then
2303         do k=1,nz(nb)
2304         do j=1,ny(nb)
2305
2306            call dread(iunit,tmp,(nx(nb)+2))
2307
2308            q    = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2309            ii   = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2310            !do jj=jstart,jend
2311               p_to = Parallel2d_convert_taskid_ij(ii,taskid_j)
2312               !p_to = Parallel2d_convert_taskid_ij(ii,jj)
2313
2314               if (ii.eq.MASTER) then
2315                  index = (q-1)*(nx(nb)/2+1) + 1
2316                  call dcopy((nx(nb)+2),tmp,1,A(index),1)
2317               else
2318                  msglen = (nx(nb)/2+1)
2319                  call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1)
2320               end if
2321            !end do
2322         end do
2323         end do
2324
2325*     **** not master node ****
2326      else if (fillcolumn) then
2327         do k=1,nz(nb)
2328         do j=1,ny(nb)
2329
2330            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2331            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2332            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2333            if (p_here.eq.taskid) then
2334               msglen = (nx(nb)/2+1)
2335               !source = MASTER
2336               source = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2337               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
2338     >                  source,rcv_proc,1)
2339
2340               index = (q-1)*(nx(nb)/2+1) + 1
2341               call dcopy((nx(nb)+2),tmp,1,A(index),1)
2342            end if
2343         end do
2344         end do
2345      end if
2346
2347
2348*     **** allocate temporary space  ****
2349      value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
2350     >                    tmp1(2),tmp1(1))
2351      value = value.and.
2352     >      BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
2353      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
2354*
2355      call D3dB_c_transpose_ijk(nb,5,A,
2356     >                          dcpl_mb(tmp1(1)),
2357     >                          dcpl_mb(tmp2(1)))  !*** map1to3 operation ***
2358
2359*     **** deallocate temporary space  ****
2360      value = BA_pop_stack(tmp2(2))
2361      value = value.and.BA_pop_stack(tmp1(2))
2362      if (.not. value) call errquit('error popping stack',0, MA_ERR)
2363
2364      end if
2365
2366
2367      !*** shift filepointer by (nx(nb)+2)*ny(nb)*nz(nb) doubles ****
2368      else
2369         if (taskid_i.eq.MASTER)
2370     >      call dshift_fileptr(iunit,(nx(nb)+2)*ny(nb)*nz(nb))
2371      end if !*** readcolumn ***
2372
2373
2374
2375      return
2376      end
2377
2378
2379
2380      subroutine D3dB_r_read_pio(nb,iunit,A,tmp,jcol)
2381      implicit none
2382      integer nb
2383      integer iunit
2384      real*8  A(*)
2385      real*8  tmp(*)
2386      integer jcol
2387
2388#include "bafdecls.fh"
2389#include "D3dB.fh"
2390
2391#include "tcgmsg.fh"
2392#include "msgtypesf.h"
2393      integer rcv_len,rcv_proc
2394
2395
2396*     *** local variables ***
2397      logical fillcolumn,readcolumn
2398      integer MASTER,taskid,taskid_i
2399      parameter(MASTER=0)
2400      integer p_to, p_here,q
2401      integer j,k,index,index2
2402      integer source,msglen
2403
2404      integer taskid_j,np_j
2405      integer ii,jj,jstart,jend
2406
2407*     **** external functions ****
2408      integer  Parallel2d_convert_taskid_ij
2409      external Parallel2d_convert_taskid_ij
2410
2411      call Parallel_taskid(taskid)
2412      call Parallel2d_taskid_i(taskid_i)
2413      call Parallel2d_taskid_j(taskid_j)
2414
2415c      call Parallel2d_np_j(np_j)
2416      if (jcol.lt.0) then
2417c         jstart = 0
2418c         jend   = np_j-1
2419         fillcolumn = .true.
2420         readcolumn = .true.
2421      else
2422c         jstart = jcol
2423c         jend   = jcol
2424         fillcolumn = (taskid_j.eq.jcol)
2425         readcolumn = (taskid_j.eq.jcol)
2426      end if
2427
2428      if (readcolumn) then
2429
2430      !**********************
2431      !**** slab mapping ****
2432      !**********************
2433      if (mapping.eq.1) then
2434*     **** master node reads from file and distributes ****
2435      if (taskid_i.eq.MASTER) then
2436         do k=1,nz(nb)
2437
2438            call dread(iunit,tmp,(nx(nb))*ny(nb))
2439
2440            call D3dB_ktoqp(nb,k,q,ii)
2441            !do jj=jstart,jend
2442               p_to = Parallel2d_convert_taskid_ij(ii,taskid_j)
2443               if (p_to.eq.MASTER) then
2444                  do j=1,ny(nb)
2445                     index = (q-1)*(nx(nb)+2)*ny(nb)
2446     >                     + (j-1)*(nx(nb)+2) + 1
2447                     index2 = (j-1)*nx(nb) + 1
2448                     call dcopy(nx(nb),tmp(index2),1,A(index),1)
2449                     A(index+nx(nb)) = 0.0d0
2450                     A(index+nx(nb)+1) = 0.0d0
2451                  end do
2452               else
2453                  msglen = (nx(nb))*ny(nb)
2454                  call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
2455               end if
2456            !end do
2457         end do
2458
2459*     **** not master node ****
2460      else if (fillcolumn) then
2461         do k=1,nz(nb)
2462
2463            call D3dB_ktoqp(nb,k,q,ii)
2464            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2465            if (p_here.eq.taskid) then
2466
2467               msglen = (nx(nb))*ny(nb)
2468               !source  = MASTER
2469               source  = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2470
2471               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
2472     >                       source,rcv_proc,1)
2473               do j=1,ny(nb)
2474                  index = (q-1)*(nx(nb)+2)*ny(nb)
2475     >                  + (j-1)*(nx(nb)+2) + 1
2476                  index2 = (j-1)*nx(nb) + 1
2477                  call dcopy(nx(nb),tmp(index2),1,A(index),1)
2478                  A(index+nx(nb)) = 0.0d0
2479                  A(index+nx(nb)+1) = 0.0d0
2480               end do
2481            end if
2482         end do
2483      end if
2484
2485
2486
2487      !*************************
2488      !**** hilbert mapping ****
2489      !*************************
2490      else
2491*     **** master node reads from file and distributes ****
2492      if (taskid.eq.MASTER) then
2493         do k=1,nz(nb)
2494         do j=1,ny(nb)
2495
2496            call dread(iunit,tmp,(nx(nb)))
2497
2498            q    = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2499            ii   = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2500            !do jj=jstart,jend
2501               p_to = Parallel2d_convert_taskid_ij(ii,taskid_j)
2502               !p_to = Parallel2d_convert_taskid_ij(ii,jj)
2503               if (p_to.eq.MASTER) then
2504                  index = (q-1)*(nx(nb)+2) + 1
2505                  call dcopy((nx(nb)+2),tmp,1,A(index),1)
2506               else
2507                  msglen = (nx(nb)+2)
2508                  call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
2509               end if
2510            !end do
2511         end do
2512         end do
2513
2514*     **** not master node ****
2515      else if (fillcolumn) then
2516         do k=1,nz(nb)
2517         do j=1,ny(nb)
2518
2519            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2520            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2521            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2522            if (p_here.eq.taskid) then
2523
2524               msglen = (nx(nb)+2)
2525               !source  = MASTER
2526               source  = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2527
2528               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
2529     >                  source,rcv_proc,1)
2530
2531               index = (q-1)*(nx(nb)+2) + 1
2532               call dcopy((nx(nb)+2),tmp,1,A(index),1)
2533
2534            end if
2535         end do
2536         end do
2537       end if
2538
2539      end if
2540
2541      !*** shift filepointer by nx(nb)*ny(nb)*nz(nb) doubles ****
2542      else
2543         if (taskid_i.eq.MASTER)
2544     >      call dshift_fileptr(iunit,nx(nb)*ny(nb)*nz(nb))
2545      end if !*** readcolumn ***
2546
2547      return
2548      end
2549
2550
2551
2552
2553      subroutine D3dB_t_read_pio(nb,iunit,A,tmp,jcol)
2554      implicit none
2555      integer nb
2556      integer iunit
2557      real*8  A(*)
2558      real*8  tmp(*)
2559      integer jcol
2560
2561#include "bafdecls.fh"
2562#include "errquit.fh"
2563#include "D3dB.fh"
2564
2565
2566
2567#include "tcgmsg.fh"
2568#include "msgtypesf.h"
2569      integer rcv_len,rcv_proc
2570
2571
2572*     *** local variables ***
2573      integer MASTER,taskid,taskid_i
2574      parameter(MASTER=0)
2575
2576      logical value,fillcolumn,readcolumn
2577      integer p_to, p_here,q
2578      integer j,k,index
2579      integer source,msglen
2580      integer tmp1(2),tmp2(2)
2581
2582      integer taskid_j,np_j
2583      integer ii,jj,jstart,jend
2584
2585*     **** external functions ****
2586      integer  Parallel2d_convert_taskid_ij
2587      external Parallel2d_convert_taskid_ij
2588
2589      call Parallel_taskid(taskid)
2590      call Parallel2d_taskid_i(taskid_i)
2591      call Parallel2d_taskid_j(taskid_j)
2592
2593c      call Parallel2d_np_j(np_j)
2594      if (jcol.lt.0) then
2595c         jstart = 0
2596c         jend   = np_j-1
2597         fillcolumn = .true.
2598         readcolumn = .true.
2599      else
2600c         jstart = jcol
2601c         jend   = jcol
2602         fillcolumn = (taskid_j.eq.jcol)
2603         readcolumn = (taskid_j.eq.jcol)
2604      end if
2605
2606      if (readcolumn) then
2607
2608
2609      if (mapping.eq.1) then
2610*        **** master node reads from file and distributes ****
2611         if (taskid_i.eq.MASTER) then
2612         do k=1,nz(nb)
2613
2614            call dread(iunit,tmp,(nx(nb)/2+1)*ny(nb))
2615
2616            call D3dB_ktoqp(nb,k,q,ii)
2617            !do jj=jstart,jend
2618
2619              p_to = Parallel2d_convert_taskid_ij(ii,taskid_j)
2620              !p_to = Parallel2d_convert_taskid_ij(ii,jj)
2621              if (p_to.eq.MASTER) then
2622                 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
2623                 call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1)
2624              else
2625                 msglen = (nx(nb)/2+1)*ny(nb)
2626                 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
2627              end if
2628            !end do
2629         end do
2630
2631*        **** not master node ****
2632         else if (fillcolumn) then
2633
2634         do k=1,nz(nb)
2635            call D3dB_ktoqp(nb,k,q,ii)
2636            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2637            if (p_here.eq.taskid) then
2638
2639               msglen = (nx(nb)/2+1)*ny(nb)
2640               !source  = MASTER
2641               source = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2642
2643               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
2644     >                       source,rcv_proc,1)
2645
2646
2647               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
2648               call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1)
2649
2650            end if
2651         end do
2652         end if
2653
2654
2655      !*************************
2656      !**** hilbert mapping ****
2657      !*************************
2658      else
2659*     **** master node reads from file and distributes ****
2660      if (taskid_i.eq.MASTER) then
2661         do k=1,nz(nb)
2662         do j=1,ny(nb)
2663
2664            call dread(iunit,tmp,(nx(nb)/2+1))
2665
2666            q   = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2667            ii  = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2668            !do jj=jstart,jend
2669
2670               p_to = Parallel2d_convert_taskid_ij(ii,taskid_j)
2671               !p_to = Parallel2d_convert_taskid_ij(ii,jj)
2672               if (ii.eq.MASTER) then
2673                  index = (q-1)*(nx(nb)/2+1) + 1
2674                  call dcopy((nx(nb)/2+1),tmp,1,A(index),1)
2675               else
2676                  msglen = (nx(nb)/2+1)
2677                  call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
2678               end if
2679            !end do
2680         end do
2681         end do
2682
2683*     **** not master node ****
2684      else if (fillcolumn) then
2685         do k=1,nz(nb)
2686         do j=1,ny(nb)
2687
2688            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2689            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2690            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2691            if (p_here.eq.taskid) then
2692
2693               msglen = (nx(nb)/2+1)
2694               !source  = MASTER
2695               source = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2696
2697               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
2698     >                  source,rcv_proc,1)
2699
2700
2701               index = (q-1)*(nx(nb)/2+1) + 1
2702               call dcopy((nx(nb)/2+1),tmp,1,A(index),1)
2703
2704            end if
2705         end do
2706         end do
2707      end if
2708
2709*     **** allocate temporary space  ****
2710      value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
2711     >                    tmp1(2),tmp1(1))
2712      value = value.and.
2713     >      BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
2714      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
2715*
2716      call D3dB_t_transpose_ijk(nb,5,A,
2717     >                          dcpl_mb(tmp1(1)),
2718     >                          dcpl_mb(tmp2(1)))  !*** map1to3 operation ***
2719
2720*     **** deallocate temporary space  ****
2721      value =           BA_pop_stack(tmp2(2))
2722      value = value.and.BA_pop_stack(tmp1(2))
2723      if (.not. value) call errquit('error popping stack',0, MA_ERR)
2724
2725
2726
2727      end if
2728
2729
2730      !*** shift filepointer by (nx(nb)/2+1)*ny(nb)*nz(nb) doubles ****
2731      else
2732         if (taskid_i.eq.MASTER)
2733     >      call dshift_fileptr(iunit,(nx(nb)/2+1)*ny(nb)*nz(nb))
2734      end if !*** readcolumn ***
2735
2736      return
2737      end
2738
2739
2740
2741
2742*     ***********************************
2743*     *					*
2744*     *	       D3dB_(c,r,t)_write	*
2745*     *					*
2746*     ***********************************
2747
2748      subroutine D3dB_c_write_pio(nb,iunit,A,tmp,jcol)
2749      implicit none
2750      integer nb
2751      integer iunit
2752      complex*16 A(*)
2753      complex*16 tmp(*)
2754      integer    jcol
2755
2756
2757#include "bafdecls.fh"
2758#include "errquit.fh"
2759#include "D3dB.fh"
2760
2761
2762#include "tcgmsg.fh"
2763#include "msgtypesf.h"
2764      integer rcv_len,rcv_proc
2765
2766
2767*     *** local variables ***
2768      logical writecolumn
2769      integer MASTER,taskid,taskid_i
2770      parameter(MASTER=0)
2771      logical value
2772      integer p_from, p_here,q
2773      integer j,k,index
2774      integer dest,source,status,msglen
2775      integer dum,dum_msglen
2776      integer tmp1(2),tmp2(2)
2777
2778      integer ii,taskid_j
2779
2780*     **** external functions ****
2781      integer  Parallel2d_convert_taskid_ij
2782      external Parallel2d_convert_taskid_ij
2783
2784      call Parallel_taskid(taskid)
2785      call Parallel2d_taskid_i(taskid_i)
2786      call Parallel2d_taskid_j(taskid_j)
2787      writecolumn = (taskid_j.eq.jcol)
2788
2789      if (writecolumn) then
2790
2791      !**********************
2792      !**** slab mapping ****
2793      !**********************
2794      if (mapping.eq.1) then
2795*     **** master node reads from file and distributes ****
2796      if (taskid_i.eq.MASTER) then
2797         do k=1,nz(nb)
2798
2799            call D3dB_ktoqp(nb,k,q,ii)
2800
2801            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
2802            !p_from = Parallel2d_convert_taskid_ij(ii,jcol)
2803            if (ii.eq.MASTER) then
2804
2805               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
2806               call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1)
2807
2808            else
2809
2810               msglen  = (nx(nb)/2+1)*ny(nb)
2811               status  = msglen
2812               source  = p_from
2813
2814               dum = 99
2815               dum_msglen = 1
2816               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
2817               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
2818     >                       source,rcv_proc,1)
2819
2820            end if
2821
2822            call dwrite(iunit,tmp,(nx(nb)+2)*ny(nb))
2823         end do
2824
2825*     **** not master node ****
2826      else
2827         do k=1,nz(nb)
2828
2829            call D3dB_ktoqp(nb,k,q,ii)
2830            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2831            if (p_here.eq.taskid) then
2832
2833               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
2834               call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1)
2835
2836
2837               msglen = (nx(nb)/2+1)*ny(nb)
2838               !dest   = MASTER
2839               dest   = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2840
2841               dum_msglen = 1
2842               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
2843     >                       dest,rcv_proc,1)
2844               call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1)
2845
2846            end if
2847
2848         end do
2849      end if
2850
2851
2852      !*************************
2853      !**** hilbert mapping ****
2854      !*************************
2855      else
2856
2857      if (taskid_j.eq.jcol) then
2858
2859*       **** allocate temporary space  ****
2860        value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
2861     >                      tmp1(2),tmp1(1))
2862        value = value.and.
2863     >          BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
2864        if (.not. value) call errquit('out of stack memory',0, MA_ERR)
2865*
2866        call D3dB_c_transpose_ijk(nb,6,A,
2867     >                            dcpl_mb(tmp1(1)),
2868     >                            dcpl_mb(tmp2(1)))  !*** map3to1 operation ***
2869
2870*       **** deallocate temporary space  ****
2871        value = BA_pop_stack(tmp2(2))
2872        value = value.and.BA_pop_stack(tmp1(2))
2873        if (.not. value) call errquit('error popping stack',0, MA_ERR)
2874
2875      end if
2876
2877
2878*     **** master node reads from file and distributes ****
2879      if (taskid_i.eq.MASTER) then
2880         do k=1,nz(nb)
2881         do j=1,ny(nb)
2882
2883            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2884            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2885            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
2886            !p_from = Parallel2d_convert_taskid_ij(ii,jcol)
2887            if (ii.eq.MASTER) then
2888
2889               index = (q-1)*(nx(nb)/2+1) + 1
2890               call dcopy((nx(nb)+2),A(index),1,tmp,1)
2891
2892            else
2893
2894               msglen  = (nx(nb)/2+1)
2895               status  = msglen
2896               source  = p_from
2897
2898               dum = 99
2899               dum_msglen = 1
2900               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
2901               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
2902     >                       source,rcv_proc,1)
2903
2904            end if
2905
2906            call dwrite(iunit,tmp,(nx(nb)+2))
2907         end do
2908         end do
2909
2910*     **** not master node ****
2911      else
2912         do k=1,nz(nb)
2913         do j=1,ny(nb)
2914
2915            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2916            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
2917            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
2918            if (p_here.eq.taskid) then
2919
2920               index = (q-1)*(nx(nb)/2+1) + 1
2921               call dcopy((nx(nb)+2),A(index),1,tmp,1)
2922
2923
2924               msglen = (nx(nb)/2+1)
2925               !dest   = MASTER
2926               dest   = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
2927
2928               dum_msglen = 1
2929               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
2930     >                       dest,rcv_proc,1)
2931               call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1)
2932
2933            end if
2934
2935         end do
2936         end do
2937      end if
2938
2939      end if
2940
2941
2942      !*** shift filepointer by (nx(nb)+2)*ny(nb)*nz(nb) doubles ****
2943      else
2944         if (taskid_i.eq.MASTER)
2945     >      call dshift_fileptr(iunit,(nx(nb)+2)*ny(nb)*nz(nb))
2946      end if !*** writecolumn ***
2947
2948
2949      return
2950      end
2951
2952      subroutine D3dB_r_write_pio(nb,iunit,A,tmp,jcol)
2953      implicit none
2954      integer nb
2955      integer iunit
2956      real*8     A(*)
2957      real*8     tmp(*)
2958      integer    jcol
2959
2960#include "bafdecls.fh"
2961#include "D3dB.fh"
2962
2963
2964
2965#include "tcgmsg.fh"
2966#include "msgtypesf.h"
2967      integer rcv_len,rcv_proc
2968
2969
2970*     *** local variables ***
2971      logical writecolumn
2972      integer MASTER,taskid,taskid_i
2973      parameter(MASTER=0)
2974      integer p_from, p_here,q
2975      integer j,k,index,index2
2976      integer dest,source,status,msglen
2977
2978      integer taskid_j,ii
2979
2980*     **** external functions ****
2981      integer  Parallel2d_convert_taskid_ij
2982      external Parallel2d_convert_taskid_ij
2983
2984      call Parallel_taskid(taskid)
2985      call Parallel2d_taskid_i(taskid_i)
2986      call Parallel2d_taskid_j(taskid_j)
2987      writecolumn = (taskid_j.eq.jcol)
2988
2989      if (writecolumn) then
2990
2991      !**********************
2992      !**** slab mapping ****
2993      !**********************
2994      if (mapping.eq.1) then
2995*     **** master node reads from file and distributes ****
2996      if (taskid_i.eq.MASTER) then
2997         do k=1,nz(nb)
2998
2999            call D3dB_ktoqp(nb,k,q,ii)
3000            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
3001            if (ii.eq.MASTER) then
3002
3003               do j=1,ny(nb)
3004                 index = (q-1)*(nx(nb)+2)*ny(nb)
3005     >                 + (j-1)*(nx(nb)+2) + 1
3006                 index2 = (j-1)*nx(nb) + 1
3007                 call dcopy(nx(nb),A(index),1,tmp(index2),1)
3008               end do
3009
3010            else
3011
3012               msglen  = (nx(nb))*ny(nb)
3013               status  = msglen
3014               source  = p_from
3015
3016               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3017     >                       source,rcv_proc,1)
3018
3019            end if
3020
3021            call dwrite(iunit,tmp,(nx(nb))*ny(nb))
3022         end do
3023
3024*     **** not master node ****
3025      else
3026         do k=1,nz(nb)
3027
3028            call D3dB_ktoqp(nb,k,q,ii)
3029            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3030            !p_here = Parallel2d_convert_taskid_ij(ii,jcol)
3031            if (p_here.eq.taskid) then
3032
3033               do j=1,ny(nb)
3034                  index = (q-1)*(nx(nb)+2)*ny(nb)
3035     >                  + (j-1)*(nx(nb)+2) + 1
3036                  index2 = (j-1)*nx(nb) + 1
3037                  call dcopy(nx(nb),A(index),1,tmp(index2),1)
3038               end do
3039
3040
3041               msglen = (nx(nb))*ny(nb)
3042               !dest    = MASTER
3043               dest   = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
3044
3045               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
3046
3047            end if
3048
3049         end do
3050      end if
3051
3052      !*************************
3053      !**** hilbert mapping ****
3054      !*************************
3055      else
3056*     **** master node reads from file and distributes ****
3057      if (taskid_i.eq.MASTER) then
3058         do k=1,nz(nb)
3059         do j=1,ny(nb)
3060
3061            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3062            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3063            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
3064
3065            if (ii.eq.MASTER) then
3066              index = (q-1)*(nx(nb)+2) + 1
3067              call dcopy(nx(nb),A(index),1,tmp,1)
3068
3069            else
3070
3071               msglen  = (nx(nb))
3072               status  = msglen
3073               source  = p_from
3074
3075               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3076     >                       source,rcv_proc,1)
3077
3078            end if
3079
3080            call dwrite(iunit,tmp,(nx(nb)))
3081         end do
3082         end do
3083
3084*     **** not master node ****
3085      else
3086         do k=1,nz(nb)
3087         do j=1,ny(nb)
3088
3089            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3090            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3091            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3092            if (p_here.eq.taskid) then
3093
3094               index = (q-1)*(nx(nb)+2) + 1
3095               call dcopy(nx(nb),A(index),1,tmp,1)
3096
3097               msglen  = nx(nb)
3098               dest    = MASTER
3099
3100               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
3101
3102            end if
3103
3104         end do
3105         end do
3106      end if
3107
3108      end if
3109
3110      !*** shift filepointer by nx(nb)*ny(nb)*nz(nb) doubles ****
3111      else
3112         if (taskid_i.eq.MASTER)
3113     >      call dshift_fileptr(iunit,nx(nb)*ny(nb)*nz(nb))
3114      end if !*** writecolumn ***
3115
3116      return
3117      end
3118
3119      subroutine D3dB_t_write_pio(nb,iunit,A,tmp,jcol)
3120      implicit none
3121      integer nb
3122      integer iunit
3123      real*8  A(*)
3124      real*8  tmp(*)
3125      integer jcol
3126
3127#include "bafdecls.fh"
3128#include "errquit.fh"
3129#include "D3dB.fh"
3130#include "tcgmsg.fh"
3131#include "msgtypesf.h"
3132
3133      integer rcv_len,rcv_proc
3134
3135
3136*     *** local variables ***
3137      logical writecolumn
3138      integer MASTER,taskid,taskid_i
3139      parameter(MASTER=0)
3140      logical value
3141      integer p_from, p_here,q
3142      integer j,k,index
3143      integer dest,source,status,msglen
3144      integer dum,dum_msglen
3145      integer tmp1(2),tmp2(2)
3146
3147      integer ii,taskid_j
3148
3149*     **** external functions ****
3150      integer  Parallel2d_convert_taskid_ij
3151      external Parallel2d_convert_taskid_ij
3152
3153      call Parallel_taskid(taskid)
3154      call Parallel2d_taskid_i(taskid_i)
3155      call Parallel2d_taskid_j(taskid_j)
3156      writecolumn = (taskid_j.eq.jcol)
3157
3158      if (writecolumn) then
3159
3160      !**********************
3161      !**** slab mapping ****
3162      !**********************
3163      if (mapping.eq.1) then
3164*     **** master node reads from file and distributes ****
3165      if (taskid_i.eq.MASTER) then
3166         do k=1,nz(nb)
3167
3168            call D3dB_ktoqp(nb,k,q,ii)
3169
3170            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
3171            if (ii.eq.MASTER) then
3172
3173               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3174               call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1)
3175
3176            else
3177
3178               msglen  = (nx(nb)/2+1)*ny(nb)
3179               status  = msglen
3180               source  = p_from
3181
3182               dum = 99
3183               dum_msglen = 1
3184               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
3185               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3186     >                       source,rcv_proc,1)
3187
3188            end if
3189
3190            call dwrite(iunit,tmp,(nx(nb)/2+1)*ny(nb))
3191         end do
3192
3193*     **** not master node ****
3194      else
3195         do k=1,nz(nb)
3196
3197            call D3dB_ktoqp(nb,k,q,ii)
3198            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3199            if (p_here.eq.taskid) then
3200
3201               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3202               call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1)
3203
3204
3205               msglen = (nx(nb)/2+1)*ny(nb)
3206               !dest   = MASTER
3207               dest   = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
3208
3209               dum_msglen = 1
3210               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
3211     >                       dest,rcv_proc,1)
3212               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
3213
3214            end if
3215
3216         end do
3217      end if
3218
3219
3220      !*************************
3221      !**** hilbert mapping ****
3222      !*************************
3223      else
3224
3225      if (taskid_j.eq.jcol) then
3226
3227*       **** allocate temporary space  ****
3228        value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
3229     >                      tmp1(2),tmp1(1))
3230        value = value.and.
3231     >          BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
3232        if (.not. value) call errquit('out of stack memory',0, MA_ERR)
3233*
3234        call D3dB_t_transpose_ijk(nb,6,A,
3235     >                            dcpl_mb(tmp1(1)),
3236     >                            dcpl_mb(tmp2(1)))  !*** map3to1 operation ***
3237
3238*       **** deallocate temporary space  ****
3239        value = BA_pop_stack(tmp2(2))
3240        value = value.and.BA_pop_stack(tmp1(2))
3241        if (.not. value) call errquit('error popping stack',0, MA_ERR)
3242
3243      end if
3244
3245
3246*     **** master node reads from file and distributes ****
3247      if (taskid_i.eq.MASTER) then
3248         do k=1,nz(nb)
3249         do j=1,ny(nb)
3250
3251            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3252            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3253            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
3254            if (ii.eq.MASTER) then
3255
3256               index = (q-1)*(nx(nb)/2+1) + 1
3257               call dcopy((nx(nb)/2+1),A(index),1,tmp,1)
3258
3259            else
3260
3261               msglen  = (nx(nb)/2+1)
3262               status  = msglen
3263               source  = p_from
3264
3265               dum = 99
3266               dum_msglen = 1
3267               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
3268               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3269     >                       source,rcv_proc,1)
3270
3271            end if
3272
3273            call dwrite(iunit,tmp,(nx(nb)/2+1))
3274         end do
3275         end do
3276
3277*     **** not master node ****
3278      else
3279         do k=1,nz(nb)
3280         do j=1,ny(nb)
3281
3282            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3283            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3284            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3285            if (p_here.eq.taskid) then
3286
3287               index = (q-1)*(nx(nb)/2+1) + 1
3288               call dcopy((nx(nb)/2+1),A(index),1,tmp,1)
3289
3290
3291               msglen = (nx(nb)/2+1)
3292               !dest   = MASTER
3293               dest   = Parallel2d_convert_taskid_ij(MASTER,taskid_j)
3294
3295               dum_msglen = 1
3296               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
3297     >                       dest,rcv_proc,1)
3298               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
3299
3300            end if
3301
3302         end do
3303         end do
3304      end if
3305      end if
3306
3307      !*** shift filepointer by (nx(nb)/2+1)*ny(nb)*nz(nb) doubles ****
3308      else
3309         if (taskid_i.eq.MASTER)
3310     >      call dshift_fileptr(iunit,(nx(nb)/2+1)*ny(nb)*nz(nb))
3311      end if !*** writecolumn ***
3312
3313
3314      return
3315      end
3316
3317
3318
3319
3320      subroutine D3dB_c_read(nb,iunit,A,tmp,jcol)
3321      implicit none
3322      integer nb
3323      integer iunit
3324      complex*16 A(*)
3325      complex*16 tmp(*)
3326      integer    jcol
3327
3328#include "bafdecls.fh"
3329#include "errquit.fh"
3330
3331#include "D3dB.fh"
3332
3333#include "tcgmsg.fh"
3334#include "msgtypesf.h"
3335      integer rcv_len,rcv_proc
3336
3337
3338*     *** local variables ***
3339      logical value,fillcolumn
3340      integer MASTER,taskid
3341      parameter(MASTER=0)
3342      integer p_to, p_here,q
3343      integer index,k,j
3344      integer source,msglen
3345      integer tmp1(2),tmp2(2)
3346
3347      integer taskid_j,np_j
3348      integer ii,jj,jstart,jend
3349
3350*     **** external functions ****
3351      integer  Parallel2d_convert_taskid_ij
3352      external Parallel2d_convert_taskid_ij
3353
3354      call Parallel_taskid(taskid)
3355      call Parallel2d_taskid_j(taskid_j)
3356      call Parallel2d_np_j(np_j)
3357      if (jcol.lt.0) then
3358         jstart = 0
3359         jend   = np_j-1
3360         fillcolumn = .true.
3361      else
3362         jstart = jcol
3363         jend   = jcol
3364         fillcolumn = (taskid_j.eq.jcol)
3365      end if
3366
3367
3368
3369      !**********************
3370      !**** slab mapping ****
3371      !**********************
3372      if (mapping.eq.1) then
3373*     **** master node reads from file and distributes ****
3374      if (taskid.eq.MASTER) then
3375         do k=1,nz(nb)
3376
3377            call dread(iunit,tmp,(nx(nb)+2)*ny(nb))
3378
3379            call D3dB_ktoqp(nb,k,q,ii)
3380            do jj=jstart,jend
3381               p_to = Parallel2d_convert_taskid_ij(ii,jj)
3382               if (p_to.eq.MASTER) then
3383                  index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3384                  call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1)
3385               else
3386                  msglen = (nx(nb)/2+1)*ny(nb)
3387                  call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1)
3388               end if
3389            end do
3390         end do
3391
3392*     **** not master node ****
3393      else if (fillcolumn) then
3394         do k=1,nz(nb)
3395            call D3dB_ktoqp(nb,k,q,ii)
3396            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3397            if (p_here.eq.taskid) then
3398
3399               msglen = (nx(nb)/2+1)*ny(nb)
3400               source = MASTER
3401
3402               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
3403     >                  source,rcv_proc,1)
3404               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3405               call dcopy((nx(nb)+2)*ny(nb),tmp,1,A(index),1)
3406
3407            end if
3408         end do
3409      end if
3410
3411      !*************************
3412      !**** hilbert mapping ****
3413      !*************************
3414      else
3415
3416*     **** master node reads from file and distributes ****
3417      if (taskid.eq.MASTER) then
3418         do k=1,nz(nb)
3419         do j=1,ny(nb)
3420
3421            call dread(iunit,tmp,(nx(nb)+2))
3422
3423            q    = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3424            ii   = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3425            do jj=jstart,jend
3426               p_to = Parallel2d_convert_taskid_ij(ii,jj)
3427
3428               if (p_to.eq.MASTER) then
3429                  index = (q-1)*(nx(nb)/2+1) + 1
3430                  call dcopy((nx(nb)+2),tmp,1,A(index),1)
3431               else
3432                  msglen = (nx(nb)/2+1)
3433                  call SND(9+MSGDBL,tmp,mdtob(2*msglen),p_to,1)
3434               end if
3435            end do
3436         end do
3437         end do
3438
3439*     **** not master node ****
3440      else if (fillcolumn) then
3441         do k=1,nz(nb)
3442         do j=1,ny(nb)
3443
3444            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3445            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3446            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3447            if (p_here.eq.taskid) then
3448               msglen = (nx(nb)/2+1)
3449               source = MASTER
3450               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
3451     >                  source,rcv_proc,1)
3452
3453               index = (q-1)*(nx(nb)/2+1) + 1
3454               call dcopy((nx(nb)+2),tmp,1,A(index),1)
3455            end if
3456         end do
3457         end do
3458      end if
3459
3460
3461*     **** allocate temporary space  ****
3462      value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
3463     >                    tmp1(2),tmp1(1))
3464      value = value.and.
3465     >      BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
3466      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
3467*
3468      call D3dB_c_transpose_ijk(nb,5,A,
3469     >                          dcpl_mb(tmp1(1)),
3470     >                          dcpl_mb(tmp2(1)))  !*** map1to3 operation ***
3471
3472*     **** deallocate temporary space  ****
3473      value = BA_pop_stack(tmp2(2))
3474      value = value.and.BA_pop_stack(tmp1(2))
3475      if (.not. value) call errquit('error popping stack',0, MA_ERR)
3476
3477      end if
3478
3479
3480      return
3481      end
3482
3483
3484
3485      subroutine D3dB_r_read(nb,iunit,A,tmp,jcol)
3486      implicit none
3487      integer nb
3488      integer iunit
3489      real*8  A(*)
3490      real*8  tmp(*)
3491      integer jcol
3492
3493#include "bafdecls.fh"
3494#include "D3dB.fh"
3495
3496#include "tcgmsg.fh"
3497#include "msgtypesf.h"
3498      integer rcv_len,rcv_proc
3499
3500
3501*     *** local variables ***
3502      logical fillcolumn
3503      integer MASTER,taskid
3504      parameter(MASTER=0)
3505      integer p_to, p_here,q
3506      integer j,k,index,index2
3507      integer source,msglen
3508
3509      integer taskid_j,np_j
3510      integer ii,jj,jstart,jend
3511
3512*     **** external functions ****
3513      integer  Parallel2d_convert_taskid_ij
3514      external Parallel2d_convert_taskid_ij
3515
3516      call Parallel_taskid(taskid)
3517      call Parallel2d_taskid_j(taskid_j)
3518      call Parallel2d_np_j(np_j)
3519      if (jcol.lt.0) then
3520         jstart = 0
3521         jend   = np_j-1
3522         fillcolumn = .true.
3523      else
3524         jstart = jcol
3525         jend   = jcol
3526         fillcolumn = (taskid_j.eq.jcol)
3527      end if
3528
3529
3530      !**********************
3531      !**** slab mapping ****
3532      !**********************
3533      if (mapping.eq.1) then
3534*     **** master node reads from file and distributes ****
3535      if (taskid.eq.MASTER) then
3536         do k=1,nz(nb)
3537
3538            call dread(iunit,tmp,(nx(nb))*ny(nb))
3539
3540            call D3dB_ktoqp(nb,k,q,ii)
3541            do jj=jstart,jend
3542               p_to = Parallel2d_convert_taskid_ij(ii,jj)
3543               if (p_to.eq.MASTER) then
3544                  do j=1,ny(nb)
3545                     index = (q-1)*(nx(nb)+2)*ny(nb)
3546     >                     + (j-1)*(nx(nb)+2) + 1
3547                     index2 = (j-1)*nx(nb) + 1
3548                     call dcopy(nx(nb),tmp(index2),1,A(index),1)
3549                     A(index+nx(nb)) = 0.0d0
3550                     A(index+nx(nb)+1) = 0.0d0
3551                  end do
3552               else
3553                  msglen = (nx(nb))*ny(nb)
3554                  call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
3555               end if
3556            end do
3557         end do
3558
3559*     **** not master node ****
3560      else if (fillcolumn) then
3561         do k=1,nz(nb)
3562
3563            call D3dB_ktoqp(nb,k,q,ii)
3564            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3565            if (p_here.eq.taskid) then
3566
3567               msglen = (nx(nb))*ny(nb)
3568               source  = MASTER
3569
3570               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3571     >                       source,rcv_proc,1)
3572               do j=1,ny(nb)
3573                  index = (q-1)*(nx(nb)+2)*ny(nb)
3574     >                  + (j-1)*(nx(nb)+2) + 1
3575                  index2 = (j-1)*nx(nb) + 1
3576                  call dcopy(nx(nb),tmp(index2),1,A(index),1)
3577                  A(index+nx(nb)) = 0.0d0
3578                  A(index+nx(nb)+1) = 0.0d0
3579               end do
3580            end if
3581         end do
3582      end if
3583
3584
3585
3586      !*************************
3587      !**** hilbert mapping ****
3588      !*************************
3589      else
3590*     **** master node reads from file and distributes ****
3591      if (taskid.eq.MASTER) then
3592         do k=1,nz(nb)
3593         do j=1,ny(nb)
3594
3595            call dread(iunit,tmp,(nx(nb)))
3596
3597            q    = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3598            ii   = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3599            do jj=jstart,jend
3600               p_to = Parallel2d_convert_taskid_ij(ii,jj)
3601               if (p_to.eq.MASTER) then
3602                  index = (q-1)*(nx(nb)+2) + 1
3603                  call dcopy((nx(nb)+2),tmp,1,A(index),1)
3604               else
3605                  msglen = (nx(nb)+2)
3606                  call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
3607               end if
3608            end do
3609         end do
3610         end do
3611
3612*     **** not master node ****
3613      else if (fillcolumn) then
3614         do k=1,nz(nb)
3615         do j=1,ny(nb)
3616
3617            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3618            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3619            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3620            if (p_here.eq.taskid) then
3621
3622               msglen = (nx(nb)+2)
3623               source  = MASTER
3624
3625               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3626     >                  source,rcv_proc,1)
3627
3628               index = (q-1)*(nx(nb)+2) + 1
3629               call dcopy((nx(nb)+2),tmp,1,A(index),1)
3630
3631            end if
3632         end do
3633         end do
3634       end if
3635
3636      end if
3637
3638      return
3639      end
3640
3641
3642
3643      subroutine D3dB_t_read(nb,iunit,A,tmp,jcol)
3644      implicit none
3645      integer nb
3646      integer iunit
3647      real*8  A(*)
3648      real*8  tmp(*)
3649      integer jcol
3650
3651#include "bafdecls.fh"
3652#include "errquit.fh"
3653#include "D3dB.fh"
3654
3655
3656
3657#include "tcgmsg.fh"
3658#include "msgtypesf.h"
3659      integer rcv_len,rcv_proc
3660
3661
3662*     *** local variables ***
3663      integer MASTER,taskid
3664      parameter(MASTER=0)
3665
3666      logical value,fillcolumn
3667      integer p_to, p_here,q
3668      integer j,k,index
3669      integer source,msglen
3670      integer tmp1(2),tmp2(2)
3671
3672      integer taskid_j,np_j
3673      integer ii,jj,jstart,jend
3674
3675*     **** external functions ****
3676      integer  Parallel2d_convert_taskid_ij
3677      external Parallel2d_convert_taskid_ij
3678
3679      call Parallel_taskid(taskid)
3680      call Parallel2d_taskid_j(taskid_j)
3681      call Parallel2d_np_j(np_j)
3682      if (jcol.lt.0) then
3683         jstart = 0
3684         jend   = np_j-1
3685         fillcolumn = .true.
3686      else
3687         jstart = jcol
3688         jend   = jcol
3689         fillcolumn = (taskid_j.eq.jcol)
3690      end if
3691
3692
3693
3694      if (mapping.eq.1) then
3695*        **** master node reads from file and distributes ****
3696         if (taskid.eq.MASTER) then
3697         do k=1,nz(nb)
3698
3699            call dread(iunit,tmp,(nx(nb)/2+1)*ny(nb))
3700
3701            call D3dB_ktoqp(nb,k,q,ii)
3702            !do jj=jstart,jend
3703
3704              p_to = Parallel2d_convert_taskid_ij(ii,jj)
3705              if (p_to.eq.MASTER) then
3706                 index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3707                 call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1)
3708              else
3709                 msglen = (nx(nb)/2+1)*ny(nb)
3710                 call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
3711              end if
3712            !end do
3713         end do
3714
3715*        **** not master node ****
3716         else if (fillcolumn) then
3717
3718         do k=1,nz(nb)
3719            call D3dB_ktoqp(nb,k,q,ii)
3720            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3721            if (p_here.eq.taskid) then
3722
3723               msglen = (nx(nb)/2+1)*ny(nb)
3724               source  = MASTER
3725               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3726     >                       source,rcv_proc,1)
3727
3728
3729               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3730               call dcopy((nx(nb)/2+1)*ny(nb),tmp,1,A(index),1)
3731
3732            end if
3733         end do
3734         end if
3735
3736
3737      !*************************
3738      !**** hilbert mapping ****
3739      !*************************
3740      else
3741*     **** master node reads from file and distributes ****
3742      if (taskid.eq.MASTER) then
3743         do k=1,nz(nb)
3744         do j=1,ny(nb)
3745
3746            call dread(iunit,tmp,(nx(nb)/2+1))
3747
3748            q   = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3749            ii  = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3750            do jj=jstart,jend
3751
3752               p_to = Parallel2d_convert_taskid_ij(ii,jj)
3753               if (p_to.eq.MASTER) then
3754                  index = (q-1)*(nx(nb)/2+1) + 1
3755                  call dcopy((nx(nb)/2+1),tmp,1,A(index),1)
3756               else
3757                  msglen = (nx(nb)/2+1)
3758                  call SND(9+MSGDBL,tmp,mdtob(msglen),p_to,1)
3759               end if
3760            end do
3761         end do
3762         end do
3763
3764*     **** not master node ****
3765      else if (fillcolumn) then
3766         do k=1,nz(nb)
3767         do j=1,ny(nb)
3768
3769            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3770            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3771            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3772            if (p_here.eq.taskid) then
3773
3774               msglen = (nx(nb)/2+1)
3775               source  = MASTER
3776
3777               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
3778     >                  source,rcv_proc,1)
3779
3780               index = (q-1)*(nx(nb)/2+1) + 1
3781               call dcopy((nx(nb)/2+1),tmp,1,A(index),1)
3782
3783            end if
3784         end do
3785         end do
3786      end if
3787
3788*     **** allocate temporary space  ****
3789      value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
3790     >                    tmp1(2),tmp1(1))
3791      value = value.and.
3792     >      BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
3793      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
3794*
3795      call D3dB_t_transpose_ijk(nb,5,A,
3796     >                          dcpl_mb(tmp1(1)),
3797     >                          dcpl_mb(tmp2(1)))  !*** map1to3 operation ***
3798
3799*     **** deallocate temporary space  ****
3800      value =           BA_pop_stack(tmp2(2))
3801      value = value.and.BA_pop_stack(tmp1(2))
3802      if (.not. value) call errquit('error popping stack',0, MA_ERR)
3803
3804
3805
3806      end if
3807
3808
3809      return
3810      end
3811
3812
3813
3814
3815*     ***********************************
3816*     *					*
3817*     *	       D3dB_(c,r,t)_write	*
3818*     *					*
3819*     ***********************************
3820
3821      subroutine D3dB_c_write(nb,iunit,A,tmp,jcol)
3822      implicit none
3823      integer nb
3824      integer iunit
3825      complex*16 A(*)
3826      complex*16 tmp(*)
3827      integer    jcol
3828
3829
3830#include "bafdecls.fh"
3831#include "errquit.fh"
3832#include "D3dB.fh"
3833
3834
3835#include "tcgmsg.fh"
3836#include "msgtypesf.h"
3837      integer rcv_len,rcv_proc
3838
3839
3840*     *** local variables ***
3841      integer MASTER,taskid
3842      parameter(MASTER=0)
3843      logical value
3844      integer p_from, p_here,q
3845      integer j,k,index
3846      integer dest,source,status,msglen
3847      integer dum,dum_msglen
3848      integer tmp1(2),tmp2(2)
3849
3850      integer ii,taskid_j
3851
3852*     **** external functions ****
3853      integer  Parallel2d_convert_taskid_ij
3854      external Parallel2d_convert_taskid_ij
3855
3856      call Parallel_taskid(taskid)
3857      call Parallel2d_taskid_j(taskid_j)
3858
3859
3860      !**********************
3861      !**** slab mapping ****
3862      !**********************
3863      if (mapping.eq.1) then
3864*     **** master node reads from file and distributes ****
3865      if (taskid.eq.MASTER) then
3866         do k=1,nz(nb)
3867
3868            call D3dB_ktoqp(nb,k,q,ii)
3869
3870            p_from = Parallel2d_convert_taskid_ij(ii,jcol)
3871            if (p_from.eq.MASTER) then
3872
3873               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3874               call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1)
3875
3876            else
3877
3878               msglen  = (nx(nb)/2+1)*ny(nb)
3879               status  = msglen
3880               source  = p_from
3881
3882               dum = 99
3883               dum_msglen = 1
3884               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
3885               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
3886     >                       source,rcv_proc,1)
3887
3888            end if
3889
3890            call dwrite(iunit,tmp,(nx(nb)+2)*ny(nb))
3891         end do
3892
3893*     **** not master node ****
3894      else
3895         do k=1,nz(nb)
3896
3897            call D3dB_ktoqp(nb,k,q,ii)
3898            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3899            if (p_here.eq.taskid) then
3900
3901               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
3902               call dcopy((nx(nb)+2)*ny(nb),A(index),1,tmp,1)
3903
3904
3905               msglen = (nx(nb)/2+1)*ny(nb)
3906               dest   = MASTER
3907
3908               dum_msglen = 1
3909               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
3910     >                       dest,rcv_proc,1)
3911               call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1)
3912
3913            end if
3914
3915         end do
3916      end if
3917
3918
3919      !*************************
3920      !**** hilbert mapping ****
3921      !*************************
3922      else
3923
3924      if (taskid_j.eq.jcol) then
3925
3926*       **** allocate temporary space  ****
3927        value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
3928     >                      tmp1(2),tmp1(1))
3929        value = value.and.
3930     >          BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
3931        if (.not. value) call errquit('out of stack memory',0, MA_ERR)
3932*
3933        call D3dB_c_transpose_ijk(nb,6,A,
3934     >                            dcpl_mb(tmp1(1)),
3935     >                            dcpl_mb(tmp2(1)))  !*** map3to1 operation ***
3936
3937*       **** deallocate temporary space  ****
3938        value = BA_pop_stack(tmp2(2))
3939        value = value.and.BA_pop_stack(tmp1(2))
3940        if (.not. value) call errquit('error popping stack',0, MA_ERR)
3941
3942      end if
3943
3944
3945*     **** master node reads from file and distributes ****
3946      if (taskid.eq.MASTER) then
3947         do k=1,nz(nb)
3948         do j=1,ny(nb)
3949
3950            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3951            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3952            p_from = Parallel2d_convert_taskid_ij(ii,jcol)
3953            if (p_from.eq.MASTER) then
3954
3955               index = (q-1)*(nx(nb)/2+1) + 1
3956               call dcopy((nx(nb)+2),A(index),1,tmp,1)
3957
3958            else
3959
3960               msglen  = (nx(nb)/2+1)
3961               status  = msglen
3962               source  = p_from
3963
3964               dum = 99
3965               dum_msglen = 1
3966               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
3967               call RCV(9+MSGDBL,tmp,mdtob(2*msglen),rcv_len,
3968     >                       source,rcv_proc,1)
3969
3970            end if
3971
3972            call dwrite(iunit,tmp,(nx(nb)+2))
3973         end do
3974         end do
3975
3976*     **** not master node ****
3977      else
3978         do k=1,nz(nb)
3979         do j=1,ny(nb)
3980
3981            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3982            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
3983            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
3984            if (p_here.eq.taskid) then
3985
3986               index = (q-1)*(nx(nb)/2+1) + 1
3987               call dcopy((nx(nb)+2),A(index),1,tmp,1)
3988
3989               msglen = (nx(nb)/2+1)
3990               dest   = MASTER
3991
3992               dum_msglen = 1
3993               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
3994     >                       dest,rcv_proc,1)
3995               call SND(9+MSGDBL,tmp,mdtob(2*msglen),dest,1)
3996
3997            end if
3998
3999         end do
4000         end do
4001      end if
4002
4003      end if
4004
4005
4006
4007      return
4008      end
4009
4010      subroutine D3dB_r_write(nb,iunit,A,tmp,jcol)
4011      implicit none
4012      integer nb
4013      integer iunit
4014      real*8     A(*)
4015      real*8     tmp(*)
4016      integer    jcol
4017
4018#include "bafdecls.fh"
4019#include "D3dB.fh"
4020
4021#include "tcgmsg.fh"
4022#include "msgtypesf.h"
4023      integer rcv_len,rcv_proc
4024
4025
4026*     *** local variables ***
4027      integer MASTER,taskid
4028      parameter(MASTER=0)
4029      integer p_from, p_here,q
4030      integer j,k,index,index2
4031      integer dest,source,status,msglen
4032
4033      integer taskid_j,ii
4034
4035*     **** external functions ****
4036      integer  Parallel2d_convert_taskid_ij
4037      external Parallel2d_convert_taskid_ij
4038
4039      call Parallel_taskid(taskid)
4040      call Parallel2d_taskid_j(taskid_j)
4041
4042
4043      !**********************
4044      !**** slab mapping ****
4045      !**********************
4046      if (mapping.eq.1) then
4047*     **** master node reads from file and distributes ****
4048      if (taskid.eq.MASTER) then
4049         do k=1,nz(nb)
4050
4051            call D3dB_ktoqp(nb,k,q,ii)
4052            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
4053            if (p_from.eq.MASTER) then
4054
4055               do j=1,ny(nb)
4056                 index = (q-1)*(nx(nb)+2)*ny(nb)
4057     >                 + (j-1)*(nx(nb)+2) + 1
4058                 index2 = (j-1)*nx(nb) + 1
4059                 call dcopy(nx(nb),A(index),1,tmp(index2),1)
4060               end do
4061
4062            else
4063
4064               msglen  = (nx(nb))*ny(nb)
4065               status  = msglen
4066               source  = p_from
4067
4068               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
4069     >                       source,rcv_proc,1)
4070
4071            end if
4072
4073            call dwrite(iunit,tmp,(nx(nb))*ny(nb))
4074         end do
4075
4076*     **** not master node ****
4077      else
4078         do k=1,nz(nb)
4079
4080            call D3dB_ktoqp(nb,k,q,ii)
4081            p_here = Parallel2d_convert_taskid_ij(ii,jcol)
4082            if (p_here.eq.taskid) then
4083
4084               do j=1,ny(nb)
4085                  index = (q-1)*(nx(nb)+2)*ny(nb)
4086     >                  + (j-1)*(nx(nb)+2) + 1
4087                  index2 = (j-1)*nx(nb) + 1
4088                  call dcopy(nx(nb),A(index),1,tmp(index2),1)
4089               end do
4090
4091
4092               msglen = (nx(nb))*ny(nb)
4093               dest    = MASTER
4094
4095               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4096
4097            end if
4098
4099         end do
4100      end if
4101
4102      !*************************
4103      !**** hilbert mapping ****
4104      !*************************
4105      else
4106*     **** master node reads from file and distributes ****
4107      if (taskid.eq.MASTER) then
4108         do k=1,nz(nb)
4109         do j=1,ny(nb)
4110
4111            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4112            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4113            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
4114
4115            if (p_from.eq.MASTER) then
4116              index = (q-1)*(nx(nb)+2) + 1
4117              call dcopy(nx(nb),A(index),1,tmp,1)
4118
4119            else
4120
4121               msglen  = (nx(nb))
4122               status  = msglen
4123               source  = p_from
4124
4125               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
4126     >                       source,rcv_proc,1)
4127
4128            end if
4129
4130            call dwrite(iunit,tmp,(nx(nb)))
4131         end do
4132         end do
4133
4134*     **** not master node ****
4135      else
4136         do k=1,nz(nb)
4137         do j=1,ny(nb)
4138
4139            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4140            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4141            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
4142            if (p_here.eq.taskid) then
4143
4144               index = (q-1)*(nx(nb)+2) + 1
4145               call dcopy(nx(nb),A(index),1,tmp,1)
4146
4147               msglen  = nx(nb)
4148               dest    = MASTER
4149
4150               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4151
4152            end if
4153
4154         end do
4155         end do
4156      end if
4157
4158      end if
4159
4160      return
4161      end
4162
4163      subroutine D3dB_t_write(nb,iunit,A,tmp,jcol)
4164      implicit none
4165      integer nb
4166      integer iunit
4167      real*8  A(*)
4168      real*8  tmp(*)
4169      integer jcol
4170
4171#include "bafdecls.fh"
4172#include "errquit.fh"
4173#include "D3dB.fh"
4174#include "tcgmsg.fh"
4175#include "msgtypesf.h"
4176
4177      integer rcv_len,rcv_proc
4178
4179
4180*     *** local variables ***
4181      integer MASTER,taskid
4182      parameter(MASTER=0)
4183      logical value
4184      integer p_from, p_here,q
4185      integer j,k,index
4186      integer dest,source,status,msglen
4187      integer dum,dum_msglen
4188      integer tmp1(2),tmp2(2)
4189
4190      integer ii,taskid_j
4191
4192*     **** external functions ****
4193      integer  Parallel2d_convert_taskid_ij
4194      external Parallel2d_convert_taskid_ij
4195
4196      call Parallel_taskid(taskid)
4197      call Parallel2d_taskid_j(taskid_j)
4198
4199
4200      !**********************
4201      !**** slab mapping ****
4202      !**********************
4203      if (mapping.eq.1) then
4204*     **** master node reads from file and distributes ****
4205      if (taskid.eq.MASTER) then
4206         do k=1,nz(nb)
4207
4208            call D3dB_ktoqp(nb,k,q,ii)
4209
4210            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
4211            if (p_from.eq.MASTER) then
4212
4213               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
4214               call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1)
4215
4216            else
4217
4218               msglen  = (nx(nb)/2+1)*ny(nb)
4219               status  = msglen
4220               source  = p_from
4221
4222               dum = 99
4223               dum_msglen = 1
4224               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
4225               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
4226     >                       source,rcv_proc,1)
4227
4228            end if
4229
4230            call dwrite(iunit,tmp,(nx(nb)/2+1)*ny(nb))
4231         end do
4232
4233*     **** not master node ****
4234      else
4235         do k=1,nz(nb)
4236
4237            call D3dB_ktoqp(nb,k,q,ii)
4238            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
4239            if (p_here.eq.taskid) then
4240
4241               index = (q-1)*(nx(nb)/2+1)*ny(nb) + 1
4242               call dcopy((nx(nb)/2+1)*ny(nb),A(index),1,tmp,1)
4243
4244
4245               msglen = (nx(nb)/2+1)*ny(nb)
4246               dest   = MASTER
4247
4248               dum_msglen = 1
4249               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
4250     >                       dest,rcv_proc,1)
4251               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4252
4253            end if
4254
4255         end do
4256      end if
4257
4258
4259      !*************************
4260      !**** hilbert mapping ****
4261      !*************************
4262      else
4263
4264      if (taskid_j.eq.jcol) then
4265
4266*       **** allocate temporary space  ****
4267        value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',
4268     >                      tmp1(2),tmp1(1))
4269        value = value.and.
4270     >          BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
4271        if (.not. value) call errquit('out of stack memory',0, MA_ERR)
4272*
4273        call D3dB_t_transpose_ijk(nb,6,A,
4274     >                            dcpl_mb(tmp1(1)),
4275     >                            dcpl_mb(tmp2(1)))  !*** map3to1 operation ***
4276
4277*       **** deallocate temporary space  ****
4278        value = BA_pop_stack(tmp2(2))
4279        value = value.and.BA_pop_stack(tmp1(2))
4280        if (.not. value) call errquit('error popping stack',0, MA_ERR)
4281
4282      end if
4283
4284
4285*     **** master node reads from file and distributes ****
4286      if (taskid.eq.MASTER) then
4287         do k=1,nz(nb)
4288         do j=1,ny(nb)
4289
4290            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4291            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4292            p_from = Parallel2d_convert_taskid_ij(ii,taskid_j)
4293            if (p_from.eq.MASTER) then
4294
4295               index = (q-1)*(nx(nb)/2+1) + 1
4296               call dcopy((nx(nb)/2+1),A(index),1,tmp,1)
4297
4298            else
4299
4300               msglen  = (nx(nb)/2+1)
4301               status  = msglen
4302               source  = p_from
4303
4304               dum = 99
4305               dum_msglen = 1
4306               call SND(9+MSGINT,dum,mitob(dum_msglen),source,1)
4307               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
4308     >                       source,rcv_proc,1)
4309
4310            end if
4311
4312            call dwrite(iunit,tmp,(nx(nb)/2+1))
4313         end do
4314         end do
4315
4316*     **** not master node ****
4317      else
4318         do k=1,nz(nb)
4319         do j=1,ny(nb)
4320
4321            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4322            ii     = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4323            p_here = Parallel2d_convert_taskid_ij(ii,taskid_j)
4324            if (p_here.eq.taskid) then
4325
4326               index = (q-1)*(nx(nb)/2+1) + 1
4327               call dcopy((nx(nb)/2+1),A(index),1,tmp,1)
4328
4329
4330               msglen = (nx(nb)/2+1)
4331               dest   = MASTER
4332
4333               dum_msglen = 1
4334               call RCV(9+MSGINT,dum,mitob(dum_msglen),rcv_len,
4335     >                       dest,rcv_proc,1)
4336               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4337
4338            end if
4339
4340         end do
4341         end do
4342      end if
4343      end if
4344
4345
4346      return
4347      end
4348
4349
4350
4351*     ***********************************
4352*     *					*
4353*     *	       D3dB_r_FormatWrite	*
4354*     *					*
4355*     ***********************************
4356
4357      subroutine D3dB_r_FormatWrite(nb,iunit,A,tmp)
4358      implicit none
4359      integer nb
4360      integer iunit
4361      real*8     A(*)
4362      real*8     tmp(*)
4363
4364#include "bafdecls.fh"
4365#include "D3dB.fh"
4366
4367
4368#include "tcgmsg.fh"
4369#include "msgtypesf.h"
4370      integer rcv_len,rcv_proc
4371
4372
4373*     *** local variables ***
4374      integer MASTER,taskid
4375      parameter(MASTER=0)
4376      integer p_from, p_here,q
4377      integer i,j,k,index,index2
4378      integer dest,source,status,msglen
4379
4380      call Parallel_taskid(taskid)
4381
4382      !**********************
4383      !**** slab mapping ****
4384      !**********************
4385      if (mapping.eq.1) then
4386*     **** master node reads from file and distributes ****
4387      if (taskid.eq.MASTER) then
4388         do k=1,nz(nb)
4389
4390            call D3dB_ktoqp(nb,k,q,p_from)
4391
4392            if (p_from.eq.MASTER) then
4393
4394               do j=1,ny(nb)
4395                 index = (q-1)*(nx(nb)+2)*ny(nb)
4396     >                 + (j-1)*(nx(nb)+2) + 1
4397                 index2 = (j-1)*nx(nb) + 1
4398                 call dcopy(nx(nb),A(index),1,tmp(index2),1)
4399               end do
4400
4401            else
4402
4403               msglen  = (nx(nb))*ny(nb)
4404               status  = msglen
4405               source  = p_from
4406
4407               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
4408     >                       source,rcv_proc,1)
4409
4410            end if
4411
4412c           **** call dwrite(iunit,tmp,(nx(nb))*ny(nb)) ****
4413            do j=1,ny(nb)
4414              write(iunit,'(3E26.14)') (tmp(i+(j-1)*nx(nb)), i=1,nx(nb))
4415            end do
4416
4417         end do
4418
4419*     **** not master node ****
4420      else
4421         do k=1,nz(nb)
4422
4423            call D3dB_ktoqp(nb,k,q,p_here)
4424            if (p_here.eq.taskid) then
4425
4426               do j=1,ny(nb)
4427                  index = (q-1)*(nx(nb)+2)*ny(nb)
4428     >                  + (j-1)*(nx(nb)+2) + 1
4429                  index2 = (j-1)*nx(nb) + 1
4430                  call dcopy(nx(nb),A(index),1,tmp(index2),1)
4431               end do
4432
4433
4434               msglen  = (nx(nb))*ny(nb)
4435               dest    = MASTER
4436
4437               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4438
4439            end if
4440
4441         end do
4442      end if
4443
4444
4445
4446      !*************************
4447      !**** hilbert mapping ****
4448      !*************************
4449      else
4450
4451
4452*     **** master node reads from file and distributes ****
4453      if (taskid.eq.MASTER) then
4454         do k=1,nz(nb)
4455         do j=1,ny(nb)
4456
4457            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4458            p_from = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4459
4460
4461            if (p_from.eq.MASTER) then
4462              index = (q-1)*(nx(nb)+2) + 1
4463              call dcopy(nx(nb),A(index),1,tmp,1)
4464
4465            else
4466
4467               msglen  = (nx(nb))
4468               status  = msglen
4469               source  = p_from
4470
4471               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
4472     >                       source,rcv_proc,1)
4473
4474            end if
4475
4476            !call dwrite(iunit,tmp,(nx(nb)))
4477            write(iunit,'(3E26.14)') (tmp(i), i=1,nx(nb))
4478         end do
4479         end do
4480
4481*     **** not master node ****
4482      else
4483         do k=1,nz(nb)
4484         do j=1,ny(nb)
4485
4486            q      = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4487            p_here = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
4488
4489            if (p_here.eq.taskid) then
4490
4491               index = (q-1)*(nx(nb)+2) + 1
4492               call dcopy(nx(nb),A(index),1,tmp,1)
4493
4494               msglen  = nx(nb)
4495               dest    = MASTER
4496
4497               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4498
4499            end if
4500
4501         end do
4502         end do
4503      end if
4504
4505
4506
4507      end if
4508
4509*     **** wait ****
4510      return
4511      end
4512
4513
4514*     *******************************************
4515*     *						*
4516*     *	       D3dB_r_FormatWrite_reverse	*
4517*     *						*
4518*     *******************************************
4519
4520      subroutine D3dB_r_FormatWrite_reverse(nb,iunit,A,tmp)
4521      implicit none
4522      integer nb
4523      integer iunit
4524      real*8     A(*)
4525      real*8     tmp(*)
4526
4527#include "D3dB.fh"
4528
4529#include "tcgmsg.fh"
4530#include "msgtypesf.h"
4531      integer rcv_len,rcv_proc
4532
4533
4534*     *** local variables ***
4535      integer MASTER,taskid
4536      parameter(MASTER=0)
4537      integer p_from, p_here,q
4538      integer i,j,k,index
4539      integer dest,source,status,msglen,idum
4540
4541      call Parallel_taskid(taskid)
4542
4543      !**********************
4544      !**** slab mapping ****
4545      !**********************
4546      if (mapping.eq.1) then
4547*     **** master node reads from file and distributes ****
4548      if (taskid.eq.MASTER) then
4549         do i=1,nx(nb)
4550         do j=1,ny(nb)
4551
4552            do k=1,nz(nb)
4553              call D3dB_ktoqp(nb,k,q,p_from)
4554              if (p_from.eq.MASTER) then
4555                 index = (q-1)*(nx(nb)+2)*ny(nb)
4556     >                 + (j-1)*(nx(nb)+2) + i
4557                 tmp(k) = A(index)
4558              else
4559                 msglen  = 1
4560                 status  = msglen
4561                 source  = p_from
4562
4563                 call SND(9+MSGINT,idum,mitob(msglen),source,1)
4564                 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len,
4565     >                         source,rcv_proc,1)
4566
4567              end if
4568            end do
4569            write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb))
4570
4571         end do
4572         end do
4573
4574*     **** not master node ****
4575      else
4576         do i=1,nx(nb)
4577         do j=1,ny(nb)
4578
4579            do k=1,nz(nb)
4580              call D3dB_ktoqp(nb,k,q,p_here)
4581              if (p_here.eq.taskid) then
4582
4583                 index = (q-1)*(nx(nb)+2)*ny(nb)
4584     >                 + (j-1)*(nx(nb)+2) + i
4585                 tmp(1) = A(index)
4586
4587                 msglen  = 1
4588                 dest    = MASTER
4589
4590                 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len,
4591     >                         dest,rcv_proc,1)
4592                 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4593
4594              end if
4595            end do
4596
4597         end do
4598         end do
4599      end if
4600
4601
4602      !*************************
4603      !**** hilbert mapping ****
4604      !*************************
4605      else
4606
4607*     **** master node reads from file and distributes ****
4608      if (taskid.eq.MASTER) then
4609         do i=1,nx(nb)
4610         do j=1,ny(nb)
4611
4612            do k=1,nz(nb)
4613              call D3dB_ijktoindex2p(nb,i,j,k,index,p_from)
4614              if (p_from.eq.MASTER) then
4615                 tmp(k) = A(index)
4616              else
4617                 msglen  = 1
4618                 status  = msglen
4619                 source  = p_from
4620
4621                 call SND(9+MSGINT,idum,mitob(msglen),source,1)
4622                 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len,
4623     >                         source,rcv_proc,1)
4624
4625              end if
4626            end do
4627            write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb))
4628
4629         end do
4630         end do
4631
4632*     **** not master node ****
4633      else
4634         do i=1,nx(nb)
4635         do j=1,ny(nb)
4636
4637            do k=1,nz(nb)
4638              call D3dB_ijktoindex2p(nb,i,j,k,index,p_here)
4639              if (p_here.eq.taskid) then
4640
4641                 tmp(1) = A(index)
4642
4643                 msglen  = 1
4644                 dest    = MASTER
4645
4646                 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len,
4647     >                         dest,rcv_proc,1)
4648                 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4649
4650              end if
4651            end do
4652
4653         end do
4654         end do
4655      end if
4656
4657      end if
4658
4659*     **** wait ****
4660      return
4661      end
4662
4663*     ***********************************
4664*     *					*
4665*     *	       D3dB_r_FormatWrite_paw	*
4666*     *					*
4667*     ***********************************
4668
4669      subroutine D3dB_r_FormatWrite_paw(nb,iunit,A,tmp)
4670      implicit none
4671      integer nb
4672      integer iunit
4673      real*8     A(*)
4674      real*8     tmp(*)
4675
4676#include "D3dB.fh"
4677
4678
4679#include "tcgmsg.fh"
4680#include "msgtypesf.h"
4681      integer rcv_len,rcv_proc
4682
4683
4684*     *** local variables ***
4685      integer MASTER,taskid
4686      parameter(MASTER=0)
4687      integer p_from, p_here,q
4688      integer i,j,k,index
4689      integer dest,source,status,msglen
4690
4691      call Parallel_taskid(taskid)
4692
4693*     **** master node reads from file and distributes ****
4694      if (taskid.eq.MASTER) then
4695         do k=1,nz(nb)
4696         do j=1,ny(nb)
4697
4698            call D3dB_ktoqp(nb,j,q,p_from)
4699
4700            if (p_from.eq.MASTER) then
4701
4702                 index = (q-1)*(nx(nb)+2)*ny(nb)
4703     >                 + (k-1)*(nx(nb)+2) + 1
4704                 call dcopy(nx(nb),A(index),1,tmp,1)
4705
4706            else
4707
4708               msglen  = (nx(nb))
4709               status  = msglen
4710               source  = p_from
4711
4712               call RCV(9+MSGDBL,tmp,mdtob(msglen),rcv_len,
4713     >                       source,rcv_proc,1)
4714
4715            end if
4716
4717c           **** call dwrite(iunit,tmp,(nx(nb))) ****
4718            write(iunit,'(3E26.14)') (tmp(i), i=1,nx(nb))
4719
4720         end do
4721         end do
4722
4723*     **** not master node ****
4724      else
4725         do k=1,nz(nb)
4726         do j=1,ny(nb)
4727
4728            call D3dB_ktoqp(nb,j,q,p_here)
4729            if (p_here.eq.taskid) then
4730
4731                  index = (q-1)*(nx(nb)+2)*ny(nb)
4732     >                  + (k-1)*(nx(nb)+2) + 1
4733                  call dcopy(nx(nb),A(index),1,tmp,1)
4734
4735
4736               msglen  = (nx(nb))
4737               dest    = MASTER
4738
4739               call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
4740
4741            end if
4742
4743         end do
4744         end do
4745      end if
4746
4747*     **** wait ****
4748      return
4749      end
4750
4751
4752
4753