1*
2* $Id$
3*
4
5* Parallel.f
6* Author - Eric Bylaska
7*
8*   These routines are to be used to keep track of the parallel message
9* passing variables, as well as iniitialize and deinitialize the
10* message passing routines.
11*
12
13
14*     *************************************
15*     *                                   *
16*     *        Parallel_Init              *
17*     *                                   *
18*     *************************************
19
20      subroutine Parallel_Init()
21      implicit none
22
23#include "Parallel.fh"
24#include "bafdecls.fh"
25#include "errquit.fh"
26
27#include "tcgmsg.fh"
28#include "global.fh"
29
30c      include 'mpif.h'
31c      integer mpierr
32
33*     **** local variables ****
34      integer i
35
36      np     = nnodes()
37      taskid = nodeid()
38
39
40*     **** set up 3d processor grid = np x 1 x 1****
41      if (.not.BA_alloc_get(mt_int,np,'procNd',procNd(2),procNd(1)))
42     >  call errquit('Parallel_init:out of heap memory',0, MA_ERR)
43
44      np_i = np
45      np_j = 1
46      np_k = 1
47      do i=0,np-1
48        int_mb(procNd(1)+i) = i
49      end do
50      taskid_i = taskid
51      taskid_j = 0
52      taskid_k = 0
53      comm_i   = ga_pgroup_get_world()
54      comm_j   = -99
55      comm_k   = -99
56
57      return
58      end
59
60
61*     *************************************
62*     *                                   *
63*     *        Parallel2d_Init            *
64*     *                                   *
65*     *************************************
66
67*     Sset up the 2d processor grid = np_i x np_j,
68*     where np_i = ncolumns, and np_j = np/np_i
69*
70      subroutine Parallel2d_Init(ncolumns)
71      implicit none
72      integer ncolumns
73
74#include "Parallel.fh"
75#include "bafdecls.fh"
76#include "errquit.fh"
77#include "global.fh"
78
79*     *** local variables ***
80      integer i,j,icount
81      integer tmp(2)
82
83      np_i = np/ncolumns
84      np_j = ncolumns
85
86      if (np_j.gt.1) then
87
88      icount = 0
89      do j=0,np_j-1
90      do i=0,np_i-1
91        if (icount.eq.taskid) then
92           taskid_i = i
93           taskid_j = j
94        end if
95        int_mb(procNd(1) + i + j*np_i) = icount
96        icount = mod((icount+1),np)
97      end do
98      end do
99
100      if (.not.BA_push_get(mt_int,np,'tmppp2',tmp(2),tmp(1)))
101     >  call errquit('Parallel2d_init:out of stack memory',0, MA_ERR)
102        do i=0,np_i-1
103          int_mb(tmp(1)+i) = int_mb(procNd(1) + i + taskid_j*np_i)
104        end do
105        comm_i = ga_pgroup_create(int_mb(tmp(1)),np_i)
106        do j=0,np_j-1
107          int_mb(tmp(1)+j) = int_mb(procNd(1) + taskid_i + j*np_i)
108        end do
109        comm_j = ga_pgroup_create(int_mb(tmp(1)),np_j)
110      if (.not.BA_pop_stack(tmp(2)))
111     >  call errquit('Parallel2d_init:popping stack memory',0, MA_ERR)
112
113      end if
114
115      return
116      end
117
118
119*     *************************************
120*     *                                   *
121*     *        Parallel2d_Finalize        *
122*     *                                   *
123*     *************************************
124
125      subroutine Parallel2d_Finalize()
126      implicit none
127
128#include "Parallel.fh"
129#include "bafdecls.fh"
130#include "errquit.fh"
131#include "global.fh"
132
133       if (np_j.gt.1) then
134
135*      **** free comm_i and comm_j communicators ****
136       if (.not.ga_pgroup_destroy(comm_i))
137     >  call errquit('Parallel2d_Finalize: error destoying comm_i',0,0)
138       if (.not.ga_pgroup_destroy(comm_j))
139     >  call errquit('Parallel2d_Finalize: error destoying comm_j',0,1)
140
141      end if
142      return
143      end
144
145
146*     *************************************
147*     *                                   *
148*     *        Parallel3d_Init            *
149*     *                                   *
150*     *************************************
151
152*     Sset up the 3d processor grid = np_i x np_j x np_k,
153*     where np_i = np/(np_j*np_k), and np_j = ncolumns and np_k=nzones
154*
155      subroutine Parallel3d_Init(ncolumns,nzones)
156      implicit none
157      integer ncolumns,nzones
158
159#include "Parallel.fh"
160#include "bafdecls.fh"
161#include "errquit.fh"
162#include "global.fh"
163
164*     *** local variables ***
165      integer i,j,k,icount
166      integer tmp(2)
167
168      np_i = np/(ncolumns*nzones)
169      np_j = ncolumns
170      np_k = nzones
171
172      if ((np_j.gt.1).or.(np_k.gt.1)) then
173
174      icount = 0
175      do k=0,np_k-1
176      do j=0,np_j-1
177      do i=0,np_i-1
178        if (icount.eq.taskid) then
179           taskid_i = i
180           taskid_j = j
181           taskid_k = k
182        end if
183        int_mb(procNd(1) + i + j*np_i + k*np_i*np_j) = icount
184        icount = mod((icount+1),np)
185      end do
186      end do
187      end do
188
189      if (.not.BA_push_get(mt_int,np,'tmppp2',tmp(2),tmp(1)))
190     >  call errquit('Parallel3d_init:out of stack memory',0, MA_ERR)
191        do i=0,np_i-1
192          int_mb(tmp(1)+i) = int_mb(procNd(1)
193     >                             + i
194     >                             + taskid_j*np_i
195     >                             + taskid_k*np_i*np_j)
196        end do
197        comm_i = ga_pgroup_create(int_mb(tmp(1)),np_i)
198        do j=0,np_j-1
199          int_mb(tmp(1)+j) = int_mb(procNd(1)
200     >                              + taskid_i
201     >                              + j*np_i
202     >                              + taskid_k*np_i*np_j)
203        end do
204        comm_j = ga_pgroup_create(int_mb(tmp(1)),np_j)
205        do k=0,np_k-1
206          int_mb(tmp(1)+k) = int_mb(procNd(1)
207     >                              + taskid_i
208     >                              + taskid_j*np_i
209     >                              + k*np_i*np_j)
210        end do
211        comm_k = ga_pgroup_create(int_mb(tmp(1)),np_k)
212      if (.not.BA_pop_stack(tmp(2)))
213     >  call errquit('Parallel3d_init:popping stack memory',0, MA_ERR)
214
215      end if
216
217      return
218      end
219
220
221*     *************************************
222*     *                                   *
223*     *        Parallel3d_Finalize        *
224*     *                                   *
225*     *************************************
226
227      subroutine Parallel3d_Finalize()
228      implicit none
229
230#include "Parallel.fh"
231#include "bafdecls.fh"
232#include "errquit.fh"
233#include "global.fh"
234
235      if ((np_j.gt.1).or.(np_k.gt.1)) then
236
237*      **** free comm_i and comm_j communicators ****
238       if (.not.ga_pgroup_destroy(comm_i))
239     >  call errquit('Parallel3d_Finalize: error destoying comm_i',0,0)
240       if (.not.ga_pgroup_destroy(comm_j))
241     >  call errquit('Parallel3d_Finalize: error destoying comm_j',0,1)
242       if (.not.ga_pgroup_destroy(comm_k))
243     >  call errquit('Parallel3d_Finalize: error destoying comm_k',0,2)
244
245      end if
246      return
247      end
248
249
250
251*     ***********************************
252*     *                                 *
253*     *         Parallel_MaxAll         *
254*     *                                 *
255*     ***********************************
256
257      subroutine Parallel_MaxAll(sum)
258c     implicit none
259      real*8  sum
260
261#include "tcgmsg.fh"
262#include "msgtypesf.h"
263#include "Parallel.fh"
264
265      if (np.gt.1) then
266         call GA_DGOP(9+MSGDBL,sum,1,'max')
267      end if
268
269      return
270      end
271
272
273
274
275
276*     ***********************************
277*     *                                 *
278*     *         Parallel_IMaxAll        *
279*     *                                 *
280*     ***********************************
281      subroutine Parallel_IMaxAll(isum)
282c     implicit none
283      integer isum
284
285#include "tcgmsg.fh"
286#include "msgtypesf.h"
287#include "Parallel.fh"
288
289      if (np.gt.1) then
290         call GA_IGOP(9+MSGINT,isum,1,'max')
291      end if
292      return
293      end
294
295
296
297
298*     ***********************************
299*     *                                 *
300*     *         Parallel_SumAll         *
301*     *                                 *
302*     ***********************************
303
304      subroutine Parallel_SumAll(sum)
305c     implicit none
306      real*8  sum
307
308#include "tcgmsg.fh"
309#include "msgtypesf.h"
310#include "Parallel.fh"
311
312      if (np.gt.1) then
313         call GA_DGOP(9+MSGDBL,sum,1,'+')
314      end if
315
316      return
317      end
318
319
320
321*     ***********************************
322*     *                                 *
323*     *         Parallel_ISumAll        *
324*     *                                 *
325*     ***********************************
326
327      subroutine Parallel_ISumAll(sum)
328c     implicit none
329      integer  sum
330
331#include "tcgmsg.fh"
332#include "msgtypesf.h"
333#include "Parallel.fh"
334
335      if (np.gt.1) then
336         call GA_IGOP(9+MSGINT,sum,1,'+')
337      end if
338
339      return
340      end
341
342
343*     ***********************************
344*     *                                 *
345*     *      Parallel_Vector_SumAll     *
346*     *                                 *
347*     ***********************************
348      subroutine Parallel_Vector_SumAll(n,sum)
349c     implicit none
350      integer n
351      real*8  sum(*)
352
353#include "bafdecls.fh"
354#include "tcgmsg.fh"
355#include "msgtypesf.h"
356#include "errquit.fh"
357#include "Parallel.fh"
358
359
360      call nwpw_timing_start(2)
361      if (np.gt.1) then
362         call GA_DGOP(9+MSGDBL,sum,n,'+')
363      end if
364      call nwpw_timing_end(2)
365      return
366      end
367
368
369
370*     ***********************************
371*     *                                 *
372*     *     Parallel_Vector_ISumAll     *
373*     *                                 *
374*     ***********************************
375
376      subroutine Parallel_Vector_ISumAll(n,sum)
377c     implicit none
378      integer n
379      integer  sum(*)
380
381#include "bafdecls.fh"
382#include "errquit.fh"
383#include "tcgmsg.fh"
384#include "msgtypesf.h"
385#include "Parallel.fh"
386
387
388      call nwpw_timing_start(2)
389      if (np.gt.1) then
390         call GA_IGOP(9+MSGINT,sum,n,'+')
391      end if
392      call nwpw_timing_end(2)
393
394      return
395      end
396
397
398
399*     ***********************************
400*     *                                 *
401*     *      Parallel_Brdcst_value      *
402*     *                                 *
403*     ***********************************
404
405      subroutine Parallel_Brdcst_value(psend,sum)
406      implicit none
407      integer psend
408      real*8  sum
409
410#include "bafdecls.fh"
411#include "errquit.fh"
412#include "tcgmsg.fh"
413#include "msgtypesf.h"
414#include "Parallel.fh"
415
416*     **** local variables ****
417      integer msglen
418
419      if (np.gt.1) then
420         msglen = 1
421         call BRDCST(9+MSGDBL,sum,mdtob(msglen),psend)
422      end if
423
424      return
425      end
426
427
428*     ***********************************
429*     *                                 *
430*     *      Parallel_Brdcst_values     *
431*     *                                 *
432*     ***********************************
433
434      subroutine Parallel_Brdcst_values(psend,nsize,sum)
435      implicit none
436      integer psend,nsize
437      real*8  sum(*)
438
439#include "bafdecls.fh"
440#include "errquit.fh"
441#include "tcgmsg.fh"
442#include "msgtypesf.h"
443#include "Parallel.fh"
444
445
446      if (np.gt.1) then
447         call BRDCST(9+MSGDBL,sum,mdtob(nsize),psend)
448      end if
449
450      return
451      end
452
453*     ***********************************
454*     *                                 *
455*     *      Parallel_Brdcst_ivalue     *
456*     *                                 *
457*     ***********************************
458
459      subroutine Parallel_Brdcst_ivalue(psend,isum)
460      implicit none
461      integer psend
462      integer isum
463
464#include "bafdecls.fh"
465#include "errquit.fh"
466#include "tcgmsg.fh"
467#include "msgtypesf.h"
468#include "Parallel.fh"
469
470*     **** local variables ****
471      integer msglen
472
473      if (np.gt.1) then
474         msglen = 1
475         call BRDCST(9+MSGINT,isum,mitob(msglen),psend)
476      end if
477
478      return
479      end
480
481
482
483*     ***********************************
484*     *                                 *
485*     *      Parallel_Brdcst_ivalues     *
486*     *                                 *
487*     ***********************************
488
489      subroutine Parallel_Brdcst_ivalues(psend,nsize,isum)
490      implicit none
491      integer psend,nsize
492      integer  isum(*)
493
494#include "bafdecls.fh"
495#include "errquit.fh"
496#include "tcgmsg.fh"
497#include "msgtypesf.h"
498#include "Parallel.fh"
499
500      if (np.gt.1) then
501         call BRDCST(9+MSGINT,isum,mitob(nsize),psend)
502      end if
503      return
504      end
505
506
507
508
509*     ***********************************
510*     *                                 *
511*     *         Parallela_MaxAll        *
512*     *                                 *
513*     ***********************************
514
515      subroutine Parallela_MaxAll(ic,sum)
516c     implicit none
517      integer ic
518      real*8  sum
519
520#include "tcgmsg.fh"
521#include "msgtypesf.h"
522#include "Parallel.fh"
523
524      if (np.gt.1) then
525         if (ic.eq.1) then
526            call GA_PGROUP_DGOP(comm_i,9+MSGDBL,sum,1,'max')
527         else if (ic.eq.2) then
528            call GA_PGROUP_DGOP(comm_j,9+MSGDBL,sum,1,'max')
529         else if (ic.eq.3) then
530            call GA_PGROUP_DGOP(comm_k,9+MSGDBL,sum,1,'max')
531         else
532            call GA_DGOP(9+MSGDBL,sum,1,'max')
533         end if
534      end if
535
536      return
537      end
538
539
540
541
542*     ***********************************
543*     *                                 *
544*     *         Parallela_SumAll        *
545*     *                                 *
546*     ***********************************
547
548      subroutine Parallela_SumAll(ic,sum)
549c     implicit none
550      integer ic
551      real*8  sum
552
553#include "tcgmsg.fh"
554#include "msgtypesf.h"
555#include "Parallel.fh"
556
557      if (np.gt.1) then
558         if (ic.eq.1) then
559            call GA_PGROUP_DGOP(comm_i,9+MSGDBL,sum,1,'+')
560         else if (ic.eq.1) then
561            call GA_PGROUP_DGOP(comm_j,9+MSGDBL,sum,1,'+')
562         else if (ic.eq.3) then
563            call GA_PGROUP_DGOP(comm_k,9+MSGDBL,sum,1,'+')
564         else
565            call GA_DGOP(9+MSGDBL,sum,1,'+')
566         end if
567      end if
568
569      return
570      end
571
572
573
574*     ***********************************
575*     *                                 *
576*     *         Parallela_ISumAll       *
577*     *                                 *
578*     ***********************************
579
580      subroutine Parallela_ISumAll(ic,sum)
581c     implicit none
582      integer ic
583      integer  sum
584
585#include "tcgmsg.fh"
586#include "msgtypesf.h"
587#include "Parallel.fh"
588
589      if (np.gt.1) then
590         if (ic.eq.1) then
591            call GA_PGROUP_IGOP(comm_i,9+MSGINT,sum,1,'+')
592         else if (ic.eq.2) then
593            call GA_PGROUP_IGOP(comm_j,9+MSGINT,sum,1,'+')
594         else if (ic.eq.3) then
595            call GA_PGROUP_IGOP(comm_k,9+MSGINT,sum,1,'+')
596         else
597            call GA_IGOP(9+MSGINT,sum,1,'+')
598         end if
599      end if
600
601      return
602      end
603
604
605*     ***********************************
606*     *                                 *
607*     *      Parallela_Vector_SumAll    *
608*     *                                 *
609*     ***********************************
610      subroutine Parallela_Vector_SumAll(ic,n,sum)
611c     implicit none
612      integer ic
613      integer n
614      real*8  sum(*)
615
616#include "bafdecls.fh"
617#include "tcgmsg.fh"
618#include "msgtypesf.h"
619#include "errquit.fh"
620#include "Parallel.fh"
621
622
623      call nwpw_timing_start(2)
624      if (np.gt.1) then
625         if (ic.eq.1) then
626            call GA_PGROUP_DGOP(comm_i,9+MSGDBL,sum,n,'+')
627         else if (ic.eq.2) then
628            call GA_PGROUP_DGOP(comm_j,9+MSGDBL,sum,n,'+')
629         else if (ic.eq.3) then
630            call GA_PGROUP_DGOP(comm_k,9+MSGDBL,sum,n,'+')
631         else
632            call GA_DGOP(9+MSGDBL,sum,n,'+')
633         end if
634      end if
635      call nwpw_timing_end(2)
636      return
637      end
638
639
640
641*     ***********************************
642*     *                                 *
643*     *     Parallela_Vector_ISumAll    *
644*     *                                 *
645*     ***********************************
646
647      subroutine Parallela_Vector_ISumAll(ic,n,sum)
648c     implicit none
649      integer ic
650      integer n
651      integer  sum(*)
652
653#include "bafdecls.fh"
654#include "errquit.fh"
655#include "tcgmsg.fh"
656#include "msgtypesf.h"
657#include "Parallel.fh"
658
659
660      call nwpw_timing_start(2)
661      if (np.gt.1) then
662         if (ic.eq.1) then
663            call GA_PGROUP_IGOP(comm_i,9+MSGINT,sum,n,'+')
664         else if (ic.eq.2) then
665            call GA_PGROUP_IGOP(comm_j,9+MSGINT,sum,n,'+')
666         else if (ic.eq.3) then
667            call GA_PGROUP_IGOP(comm_k,9+MSGINT,sum,n,'+')
668         else
669            call GA_IGOP(9+MSGINT,sum,n,'+')
670         end if
671      end if
672      call nwpw_timing_end(2)
673
674      return
675      end
676
677
678
679*     ***********************************
680*     *                                 *
681*     *      Parallela_Brdcst_value      *
682*     *                                 *
683*     ***********************************
684
685      subroutine Parallela_Brdcst_value(ic,psend,sum)
686      implicit none
687      integer ic
688      integer psend
689      real*8  sum
690
691#include "bafdecls.fh"
692#include "errquit.fh"
693#include "tcgmsg.fh"
694#include "msgtypesf.h"
695#include "Parallel.fh"
696
697*     **** local variables ****
698      integer msglen
699
700      if (np.gt.1) then
701         msglen = 1
702         if (ic.eq.1) then
703          call GA_PGROUP_BRDCST(comm_i,9+MSGDBL,sum,mdtob(msglen),psend)
704         else if (ic.eq.2) then
705          call GA_PGROUP_BRDCST(comm_j,9+MSGDBL,sum,mdtob(msglen),psend)
706         else if (ic.eq.3) then
707          call GA_PGROUP_BRDCST(comm_k,9+MSGDBL,sum,mdtob(msglen),psend)
708         else
709            call GA_BRDCST(9+MSGDBL,sum,mdtob(msglen),psend)
710         end if
711      end if
712
713      return
714      end
715
716
717*     ***********************************
718*     *                                 *
719*     *      Parallela_Brdcst_values    *
720*     *                                 *
721*     ***********************************
722
723      subroutine Parallela_Brdcst_values(ic,psend,nsize,sum)
724      implicit none
725      integer ic
726      integer psend,nsize
727      real*8  sum(*)
728
729#include "bafdecls.fh"
730#include "errquit.fh"
731#include "tcgmsg.fh"
732#include "msgtypesf.h"
733#include "Parallel.fh"
734
735
736      if (np.gt.1) then
737         if (ic.eq.1) then
738           call GA_PGROUP_BRDCST(comm_i,9+MSGDBL,sum,mdtob(nsize),psend)
739         else if (ic.eq.2) then
740           call GA_PGROUP_BRDCST(comm_j,9+MSGDBL,sum,mdtob(nsize),psend)
741         else if (ic.eq.3) then
742           call GA_PGROUP_BRDCST(comm_k,9+MSGDBL,sum,mdtob(nsize),psend)
743         else
744            call GA_BRDCST(9+MSGDBL,sum,mdtob(nsize),psend)
745         end if
746      end if
747
748      return
749      end
750
751
752
753*     ***********************************
754*     *                                 *
755*     *      Parallela_Brdcst_ivalue    *
756*     *                                 *
757*     ***********************************
758      subroutine Parallela_Brdcst_ivalue(ic,psend,isum)
759      implicit none
760      integer ic
761      integer psend
762      integer isum
763
764#include "bafdecls.fh"
765#include "errquit.fh"
766#include "tcgmsg.fh"
767#include "msgtypesf.h"
768#include "Parallel.fh"
769
770*     **** local variables ****
771      integer msglen
772
773      if (np.gt.1) then
774         msglen = 1
775         if (ic.eq.1) then
776         call GA_PGROUP_BRDCST(comm_i,9+MSGINT,isum,mitob(msglen),psend)
777         else if (ic.eq.2) then
778         call GA_PGROUP_BRDCST(comm_j,9+MSGINT,isum,mitob(msglen),psend)
779         else if (ic.eq.3) then
780         call GA_PGROUP_BRDCST(comm_k,9+MSGINT,isum,mitob(msglen),psend)
781         else
782         call GA_BRDCST(9+MSGINT,isum,mitob(msglen),psend)
783         end if
784      end if
785
786      return
787      end
788
789
790
791
792*     ***********************************
793*     *                                 *
794*     *      Parallela_Brdcst_ivalues   *
795*     *                                 *
796*     ***********************************
797
798      subroutine Parallela_Brdcst_ivalues(ic,psend,nsize,isum)
799      implicit none
800      integer ic
801      integer psend,nsize
802      integer  isum(*)
803
804#include "bafdecls.fh"
805#include "errquit.fh"
806#include "tcgmsg.fh"
807#include "msgtypesf.h"
808#include "Parallel.fh"
809
810      if (np.gt.1) then
811         if (ic.eq.1) then
812          call GA_PGROUP_BRDCST(comm_i,9+MSGINT,isum,mitob(nsize),psend)
813         else if (ic.eq.2) then
814          call GA_PGROUP_BRDCST(comm_j,9+MSGINT,isum,mitob(nsize),psend)
815         else if (ic.eq.3) then
816          call GA_PGROUP_BRDCST(comm_k,9+MSGINT,isum,mitob(nsize),psend)
817         else
818            call GA_BRDCST(9+MSGINT,isum,mitob(nsize),psend)
819         end if
820      end if
821      return
822      end
823
824
825
826*     ***********************************
827*     *                                 *
828*     *      Parallela_start_rotate     *
829*     *                                 *
830*     ***********************************
831
832      subroutine Parallela_start_rotate(ic,shift,
833     >                                  A1,nsize1,
834     >                                  A2,nsize2,request)
835      implicit none
836      integer ic,shift
837      real*8  A1(*)
838      integer nsize1
839      real*8  A2(*)
840      integer nsize2
841      integer request(*)
842
843#include "bafdecls.fh"
844#include "tcgmsg.fh"
845#include "msgtypesf.h"
846#include "errquit.fh"
847
848#include "Parallel.fh"
849
850
851*     **** local variables ****
852      integer i,mynp,mytaskid,msglen,msgype,mpierr,proc_to,proc_from
853      integer rcv_proc,rcv_len,psr,tmp(2),pto
854
855*     ***** external functions ****
856      integer  Parallel3d_convert_taskid_i
857      integer  Parallel3d_convert_taskid_j
858      integer  Parallel3d_convert_taskid_k
859      external Parallel3d_convert_taskid_i
860      external Parallel3d_convert_taskid_j
861      external Parallel3d_convert_taskid_k
862
863      if (ic.eq.1) then
864         mynp     = np_i
865         mytaskid = taskid_i
866         proc_to   = mod(mytaskid+shift+mynp,mynp)
867         proc_from = mod(mytaskid-shift+mynp,mynp)
868
869         proc_to   = Parallel3d_convert_taskid_i(proc_to)
870         proc_from = Parallel3d_convert_taskid_i(proc_from)
871      else if (ic.eq.2) then
872         mynp     = np_j
873         mytaskid = taskid_j
874         proc_to   = mod(mytaskid+shift+mynp,mynp)
875         proc_from = mod(mytaskid-shift+mynp,mynp)
876
877         proc_to   = Parallel3d_convert_taskid_j(proc_to)
878         proc_from = Parallel3d_convert_taskid_j(proc_from)
879      else if (ic.eq.3) then
880         mynp     = np_k
881         mytaskid = taskid_k
882         proc_to   = mod(mytaskid+shift+mynp,mynp)
883         proc_from = mod(mytaskid-shift+mynp,mynp)
884
885         proc_to   = Parallel3d_convert_taskid_k(proc_to)
886         proc_from = Parallel3d_convert_taskid_k(proc_from)
887      else
888         mynp     = np
889         mytaskid = taskid
890         proc_to   = mod(mytaskid+shift+mynp,mynp)
891         proc_from = mod(mytaskid-shift+mynp,mynp)
892      end if
893
894*      /* determine psr - should be made w/o using tmp array! */
895      if (.not.BA_push_get(mt_int,mynp,'tmppp2',tmp(2),tmp(1)))
896     >  call errquit('Parallela_start_rotate:out of stack',0,MA_ERR)
897      do i=0,np-1
898         int_mb(tmp(1)+i) = 0
899      end do
900      do i=0,np-1
901         pto = mod(i+shift+mynp,mynp)
902         if ((int_mb(tmp(1)+i).eq.0).and.(int_mb(tmp(1)+pto).eq.0)) then
903            int_mb(tmp(1)+i)   = 1
904            int_mb(tmp(1)+pto) = 2
905         end if
906      end do
907      psr = int_mb(tmp(1)+mytaskid)
908      if (psr.eq.0) psr = 2
909      if (.not.BA_pop_stack(tmp(2)))
910     >  call errquit('Parallela_start_rotate:popping stack',0,MA_ERR)
911
912*     **** send then receive ****
913      if (psr.eq.1) then
914
915         if (nsize1.gt.0) then
916            msglen  = nsize1
917            call SND(9+MSGDBL,A1,mdtob(msglen),proc_to,1)
918            request(4) = 1
919         else
920            request(4) = 0
921         end if
922
923         if (nsize2.gt.0) then
924            msglen  = nsize2
925            call RCV(9+MSGDBL,A2,mdtob(msglen),rcv_len,
926     >            proc_from,rcv_proc,1)
927            request(3) = 1
928         else
929            request(3) = 0
930         end if
931
932*     **** receive then receive ****
933      else
934         if (nsize2.gt.0) then
935            msglen  = nsize2
936            call RCV(9+MSGDBL,A2,mdtob(msglen),rcv_len,
937     >            proc_from,rcv_proc,1)
938            request(3) = 1
939         else
940            request(3) = 0
941         end if
942
943         if (nsize1.gt.0) then
944            msglen  = nsize1
945            call SND(9+MSGDBL,A1,mdtob(msglen),proc_to,1)
946            request(4) = 1
947         else
948            request(4) = 0
949         end if
950      end if
951
952      if ((request(3).eq.1).and.(request(4).eq.1)) then
953         request(3) = 1
954      else if (request(3).eq.1) then
955         request(3) = 2
956      else if (request(4).eq.1) then
957         request(3) = 3
958      else
959         request(3) = 4
960      end if
961
962      return
963      end
964
965
966*     ***********************************
967*     *                                 *
968*     *      Parallela_start_Irotate    *
969*     *                                 *
970*     ***********************************
971      subroutine Parallela_start_Irotate(ic,shift,
972     >                                  A1,nsize1,
973     >                                  A2,nsize2,request)
974      implicit none
975      integer ic,shift
976      integer A1(*)
977      integer nsize1
978      integer A2(*)
979      integer nsize2
980      integer request(*)
981
982#include "bafdecls.fh"
983#include "tcgmsg.fh"
984#include "msgtypesf.h"
985#include "errquit.fh"
986
987#include "Parallel.fh"
988
989
990*     **** local variables ****
991      integer i,mynp,mytaskid,msglen,msgype,mpierr,proc_to,proc_from
992      integer rcv_proc,rcv_len,psr,tmp(2),pto
993
994*     ***** external functions ****
995      integer  Parallel3d_convert_taskid_i
996      integer  Parallel3d_convert_taskid_j
997      integer  Parallel3d_convert_taskid_k
998      external Parallel3d_convert_taskid_i
999      external Parallel3d_convert_taskid_j
1000      external Parallel3d_convert_taskid_k
1001
1002      if (ic.eq.1) then
1003         mynp     = np_i
1004         mytaskid = taskid_i
1005         proc_to   = mod(mytaskid+shift+mynp,mynp)
1006         proc_from = mod(mytaskid-shift+mynp,mynp)
1007
1008         proc_to   = Parallel3d_convert_taskid_i(proc_to)
1009         proc_from = Parallel3d_convert_taskid_i(proc_from)
1010      else if (ic.eq.2) then
1011         mynp     = np_j
1012         mytaskid = taskid_j
1013         proc_to   = mod(mytaskid+shift+mynp,mynp)
1014         proc_from = mod(mytaskid-shift+mynp,mynp)
1015
1016         proc_to   = Parallel3d_convert_taskid_j(proc_to)
1017         proc_from = Parallel3d_convert_taskid_j(proc_from)
1018      else if (ic.eq.3) then
1019         mynp     = np_k
1020         mytaskid = taskid_k
1021         proc_to   = mod(mytaskid+shift+mynp,mynp)
1022         proc_from = mod(mytaskid-shift+mynp,mynp)
1023
1024         proc_to   = Parallel3d_convert_taskid_k(proc_to)
1025         proc_from = Parallel3d_convert_taskid_k(proc_from)
1026      else
1027         mynp     = np
1028         mytaskid = taskid
1029         proc_to   = mod(mytaskid+shift+mynp,mynp)
1030         proc_from = mod(mytaskid-shift+mynp,mynp)
1031      end if
1032
1033*      /* determine psr - should be made w/o using tmp array! */
1034      if (.not.BA_push_get(mt_int,mynp,'tmppp2',tmp(2),tmp(1)))
1035     >  call errquit('Parallela_start_rotate:out of stack',0,MA_ERR)
1036      do i=0,np-1
1037         int_mb(tmp(1)+i) = 0
1038      end do
1039      do i=0,np-1
1040         pto = mod(i+shift+mynp,mynp)
1041         if ((int_mb(tmp(1)+i).eq.0).and.(int_mb(tmp(1)+pto).eq.0)) then
1042            int_mb(tmp(1)+i)   = 1
1043            int_mb(tmp(1)+pto) = 2
1044         end if
1045      end do
1046      psr = int_mb(tmp(1)+mytaskid)
1047      if (psr.eq.0) psr = 2
1048      if (.not.BA_pop_stack(tmp(2)))
1049     >  call errquit('Parallela_start_rotate:popping stack',0,MA_ERR)
1050
1051*     **** send then receive ****
1052      if (psr.eq.1) then
1053
1054         if (nsize1.gt.0) then
1055            msglen  = nsize1
1056            call SND(9+MSGINT,A1,mitob(msglen),proc_to,1)
1057            request(4) = 1
1058         else
1059            request(4) = 0
1060         end if
1061
1062         if (nsize2.gt.0) then
1063            msglen  = nsize2
1064            call RCV(9+MSGINT,A2,mitob(msglen),rcv_len,
1065     >            proc_from,rcv_proc,1)
1066            request(3) = 1
1067         else
1068            request(3) = 0
1069         end if
1070
1071*     **** receive then receive ****
1072      else
1073         if (nsize2.gt.0) then
1074            msglen  = nsize2
1075            call RCV(9+MSGINT,A2,mitob(msglen),rcv_len,
1076     >            proc_from,rcv_proc,1)
1077            request(3) = 1
1078         else
1079            request(3) = 0
1080         end if
1081
1082         if (nsize1.gt.0) then
1083            msglen  = nsize1
1084            call SND(9+MSGINT,A1,mitob(msglen),proc_to,1)
1085            request(4) = 1
1086         else
1087            request(4) = 0
1088         end if
1089      end if
1090
1091      if ((request(3).eq.1).and.(request(4).eq.1)) then
1092         request(3) = 1
1093      else if (request(3).eq.1) then
1094         request(3) = 2
1095      else if (request(4).eq.1) then
1096         request(3) = 3
1097      else
1098         request(3) = 4
1099      end if
1100
1101      return
1102      end
1103
1104
1105
1106
1107
1108
1109*     ***********************************
1110*     *                                 *
1111*     *      Parallela_end_rotate       *
1112*     *                                 *
1113*     ***********************************
1114
1115      subroutine Parallela_end_rotate(request)
1116      implicit none
1117      integer request(*)
1118
1119*     **** wait for completion of mp_send, also do a sync ****
1120      !*** do nothing ***
1121
1122      return
1123      end
1124
1125
1126
1127*     ***********************************
1128*     *                                 *
1129*     *      Parallel_send_characters   *
1130*     *                                 *
1131*     ***********************************
1132      subroutine Parallel_send_characters(pto,msgtype,nsize,cval)
1133      implicit none
1134      integer pto,msgtype,nsize
1135      character cval(*)
1136
1137#include "tcgmsg.fh"
1138#include "msgtypesf.h"
1139
1140      integer  rcv_len,rcv_proc
1141
1142      call SND(9+MSGCHR,cval,nsize,pto,1)
1143      return
1144      end
1145
1146*     ***********************************
1147*     *                                 *
1148*     *      Parallel_send_values       *
1149*     *                                 *
1150*     ***********************************
1151      subroutine Parallel_send_values(pto,msgtype,nsize,rval)
1152      implicit none
1153      integer pto,msgtype,nsize
1154      character rval(*)
1155
1156#include "tcgmsg.fh"
1157#include "msgtypesf.h"
1158
1159      call SND(9+MSGDBL,rval,mdtob(nsize),pto,1)
1160      return
1161      end
1162
1163*     ***********************************
1164*     *                                 *
1165*     *      Parallel_send_ivalues      *
1166*     *                                 *
1167*     ***********************************
1168      subroutine Parallel_send_ivalues(pto,msgtype,nsize,ival)
1169      implicit none
1170      integer pto,msgtype,nsize
1171      integer   ival(*)
1172
1173#include "tcgmsg.fh"
1174#include "msgtypesf.h"
1175
1176      call SND(9+MSGINT,ival,mitob(nsize),pto,1)
1177      return
1178      end
1179
1180
1181
1182
1183
1184*     ***********************************
1185*     *                                 *
1186*     *      Parallel_recv_characters   *
1187*     *                                 *
1188*     ***********************************
1189      subroutine Parallel_recv_characters(pfrom,msgtype,nsize,cval)
1190      implicit none
1191      integer  pfrom,msgtype,nsize
1192      character cval(*)
1193
1194#include "tcgmsg.fh"
1195#include "msgtypesf.h"
1196
1197      integer  rcv_len,rcv_proc
1198
1199      call RCV(9+MSGCHR,cval,nsize,rcv_len,pfrom,rcv_proc,1)
1200      return
1201      end
1202
1203
1204*     ***********************************
1205*     *                                 *
1206*     *      Parallel_recv_values       *
1207*     *                                 *
1208*     ***********************************
1209      subroutine Parallel_recv_values(pfrom,msgtype,nsize,rval)
1210      implicit none
1211      integer  pfrom,msgtype,nsize
1212      real*8   rval(*)
1213
1214#include "tcgmsg.fh"
1215#include "msgtypesf.h"
1216
1217      integer  rcv_len,rcv_proc
1218
1219      call RCV(9+MSGDBL,rval,mdtob(nsize),rcv_len,pfrom,rcv_proc,1)
1220      return
1221      end
1222
1223
1224*     ***********************************
1225*     *                                 *
1226*     *      Parallel_recv_ivalues      *
1227*     *                                 *
1228*     ***********************************
1229      subroutine Parallel_recv_ivalues(pfrom,msgtype,nsize,ival)
1230      implicit none
1231      integer  pfrom,msgtype,nsize
1232      integer  ival(*)
1233
1234#include "tcgmsg.fh"
1235#include "msgtypesf.h"
1236
1237      integer  rcv_len,rcv_proc
1238
1239      call RCV(9+MSGINT,ival,mitob(nsize),rcv_len,pfrom,rcv_proc,1)
1240      return
1241      end
1242
1243