1*
2* $Id$
3*
4
5*     ***********************************
6*     *               			*
7*     *           Balance_Init 	        *
8*     *                 		*
9*     ***********************************
10
11      subroutine Balance_Init(maxsize0,nidb,nidb_out)
12      implicit none
13      integer maxsize0
14      integer nidb(0:maxsize0-1)
15      integer nidb_out(0:maxsize0-1)
16
17#include "bafdecls.fh"
18#include "balance_common.fh"
19#include "errquit.fh"
20
21*     **** local variables ****
22      logical value
23      integer nb,np,taskid
24      integer nwave,nwave_out
25      integer dum(2)
26
27
28      maxsize = maxsize0
29      call Parallel2d_np_i(np)
30      call Parallel2d_taskid_i(taskid)
31
32*     **** allocate balance memory ****
33      value =  BA_alloc_get(mt_int,2*maxsize,
34     >                      'psizea_list',
35     >                      packet_size_list(2),
36     >                      packet_size_list(1))
37      value = value.and.
38     >       BA_alloc_get(mt_int,2*maxsize,
39     >                    'indxsa_list',
40     >                    indx_start_list(2),
41     >                    indx_start_list(1))
42      value = value.and.
43     >       BA_alloc_get(mt_int,2*maxsize,
44     >                    'prctoa_list',
45     >                    proc_to_list(2),
46     >                    proc_to_list(1))
47      value = value.and.
48     >       BA_alloc_get(mt_int,2*maxsize,
49     >                   'prcfra_list',
50     >                    proc_from_list(2),
51     >                    proc_from_list(1))
52
53      value = value.and.
54     >       BA_alloc_get(mt_int,maxsize,
55     >                   'npacket_list',
56     >                    npacket_list(2),
57     >                    npacket_list(1))
58      value = value.and.
59     >       BA_alloc_get(mt_log,maxsize,
60     >                   'receiver_list',
61     >                    receiver_list(2),
62     >                    receiver_list(1))
63      value = value.and.
64     >       BA_alloc_get(mt_log,maxsize,
65     >                   'sender_list',
66     >                    sender_list(2),
67     >                    sender_list(1))
68
69      do nb=0,maxsize-1
70
71*        **** allocate balance memory ****
72         value =  value.and.
73     >          BA_alloc_get(mt_int,np,
74     >                   'psizea',dum(2),dum(1))
75         int_mb(packet_size_list(1)+2*nb  ) = dum(1)
76         int_mb(packet_size_list(1)+2*nb+1) = dum(2)
77
78         value = value.and.
79     >       BA_alloc_get(mt_int,np,
80     >                   'indxsa',dum(2),dum(1))
81         int_mb(indx_start_list(1)+2*nb  ) = dum(1)
82         int_mb(indx_start_list(1)+2*nb+1) = dum(2)
83
84         value = value.and.
85     >       BA_alloc_get(mt_int,np,
86     >                   'prctoa',dum(2),dum(1))
87         int_mb(proc_to_list(1)+2*nb  ) = dum(1)
88         int_mb(proc_to_list(1)+2*nb+1) = dum(2)
89
90         value = value.and.
91     >       BA_alloc_get(mt_int,np,
92     >                   'prcfra',dum(2),dum(1))
93         int_mb(proc_from_list(1)+2*nb  ) = dum(1)
94         int_mb(proc_from_list(1)+2*nb+1) = dum(2)
95
96      end do
97
98      if (.not. value)
99     >   call errquit('Balance_init: out of heap memory',0, MA_ERR)
100
101
102
103      do nb=0,maxsize-1
104         nwave = nidb(nb)
105         call Balance_Init_a(nwave,np,taskid,nwave_out,
106     >      int_mb(npacket_list(1) +nb),
107     >      log_mb(receiver_list(1)+nb),
108     >      log_mb(sender_list(1)  +nb),
109     >      int_mb(int_mb(proc_to_list(1)    +2*nb)),
110     >      int_mb(int_mb(proc_from_list(1)  +2*nb)),
111     >      int_mb(int_mb(packet_size_list(1)+2*nb)),
112     >      int_mb(int_mb(indx_start_list(1) +2*nb)))
113
114         nidb_out(nb) = nidb(nb) + (nwave_out-nwave)
115      end do
116
117      return
118      end
119
120
121*     ***********************************
122*     *              			*
123*     *           Balance_End 		*
124*     *                 		*
125*     ***********************************
126
127      subroutine Balance_End()
128      implicit none
129
130#include "bafdecls.fh"
131#include "balance_common.fh"
132#include "errquit.fh"
133
134
135*     **** local variables ****
136      logical value
137      integer nb,dum2
138
139      value = .true.
140      do nb=0,maxsize-1
141
142         dum2 = int_mb(packet_size_list(1)+2*nb+1)
143         value = value.and.BA_free_heap(dum2)
144
145         dum2 = int_mb(indx_start_list(1)+2*nb+1)
146         value = value.and.BA_free_heap(dum2)
147
148         dum2 = int_mb(proc_to_list(1)+2*nb+1)
149         value = value.and.BA_free_heap(dum2)
150
151         dum2 = int_mb(proc_from_list(1)+2*nb+1)
152         value = value.and.BA_free_heap(dum2)
153
154      end do
155
156      value = value.and.BA_free_heap(packet_size_list(2))
157      value = value.and.BA_free_heap(indx_start_list(2))
158      value = value.and.BA_free_heap(proc_to_list(2))
159      value = value.and.BA_free_heap(proc_from_list(2))
160
161      value = value.and.BA_free_heap(npacket_list(2))
162      value = value.and.BA_free_heap(receiver_list(2))
163      value = value.and.BA_free_heap(sender_list(2))
164      if (.not. value)
165     >  call errquit('Balance_end: error freeing heap memory',0, MA_ERR)
166
167      return
168      end
169
170*     ***********************************
171*     *                 		*
172*     *           Balance_Init_a	*
173*     *                 		*
174*     ***********************************
175*    This routine defines the balance data structure
176
177      subroutine Balance_Init_a(nwave,np,taskid,
178     >                          nwave_out,
179     >                          npacket,receiver,sender,
180     >                          proc_to,proc_from,
181     >                          packet_size,indx_start)
182      implicit none
183      integer nwave,np,taskid
184      integer nwave_out
185
186      integer npacket
187      logical receiver,sender
188      integer proc_to(*),proc_from(*)
189      integer packet_size(*)
190      integer indx_start(*)
191
192#include "bafdecls.fh"
193#include "errquit.fh"
194
195*     ***** local variables ****
196      logical done,value
197      integer i,j
198      integer ave,short,long
199      integer above,below
200
201c      integer nwave2(0:(np-1))
202c      integer indx(0:(np-1))
203      integer nwave2(2),indx(2)
204
205*     **** allocate nwave2 and indx off the stack ****
206      value = BA_push_get(mt_int,(np),
207     >                     'nwave2',nwave2(2),nwave2(1))
208      value = value.and.
209     >        BA_push_get(mt_int,(np),
210     >                     'indx',indx(2),indx(1))
211      if (.not. value)
212     >  call errquit('Balance_init_a:out of stack memory',0, MA_ERR)
213
214*     **** define nwave2 ****
215      do i=0,np-1
216c        nwave2(i) = 0
217         int_mb(nwave2(1)+i) = 0
218      end do
219c     nwave2(taskid) = nwave
220      int_mb(nwave2(1)+taskid) = nwave
221c     call D3dB_Vector_ISumAll(np,nwave2)
222      call D3dB_Vector_ISumAll(np,int_mb(nwave2(1)))
223
224*     **** get the sorting index ****
225c     call nwave2_sort(np,nwave2,indx)
226      call nwave2_sort(np,int_mb(nwave2(1)),int_mb(indx(1)))
227
228*     ***** get the average ****
229      ave = 0
230      do i=0,np-1
231c        ave = ave + nwave2(i)
232         ave = ave + int_mb(nwave2(1)+i)
233      end do
234      ave = ave/np
235
236*     ***** get below ***
237      below = -1
238      do while (int_mb(nwave2(1) + int_mb(indx(1)+below+1)).lt.ave)
239        below = below + 1
240      end do
241
242*     ***** get above ***
243      above = np
244      do while (int_mb(nwave2(1) + int_mb(indx(1)+above-1)).gt.ave)
245        above = above - 1
246      end do
247
248
249      npacket  = 0
250      receiver = .false.
251      sender   = .false.
252
253      if (np.gt.1) then
254        i = 0
255        j = np-1
256        done = .false.
257        if (i .gt. below) done = .true.
258        if (j .lt. above) done = .true.
259        do while (.not. done)
260           short = ave - int_mb(nwave2(1)+int_mb(indx(1)+i))
261           long =  int_mb(nwave2(1)+int_mb(indx(1)+j)) - ave
262
263           if (taskid.eq.int_mb(indx(1)+i)) then
264              npacket = npacket + 1
265              proc_from(npacket) = int_mb(indx(1)+j)
266              receiver = .true.
267           end if
268
269           if (taskid.eq.int_mb(indx(1)+j)) then
270              npacket = npacket + 1
271              proc_to(npacket) = int_mb(indx(1)+i)
272              sender   = .true.
273           end if
274
275
276           if (short.eq.long) then
277
278             if (taskid.eq.int_mb(indx(1)+i)) then
279                packet_size(npacket) = short
280                indx_start(npacket)  =
281     >              int_mb(nwave2(1)+int_mb(indx(1)+i)) + 1
282             end if
283
284             if (taskid.eq.int_mb(indx(1)+j)) then
285                packet_size(npacket) = long
286                indx_start(npacket) =
287     >            int_mb(nwave2(1)+int_mb(indx(1)+j)) - long + 1
288             end if
289
290             int_mb(nwave2(1)+int_mb(indx(1)+i)) =
291     >         int_mb(nwave2(1)+int_mb(indx(1)+i)) + short
292             int_mb(nwave2(1)+int_mb(indx(1)+j)) =
293     >         int_mb(nwave2(1)+int_mb(indx(1)+j)) - long
294             i = i + 1
295             j = j - 1
296
297
298           else if (short.lt.long) then
299
300             if (taskid.eq.int_mb(indx(1)+i)) then
301               packet_size(npacket) = short
302               indx_start(npacket) =
303     >            int_mb(nwave2(1)+int_mb(indx(1)+i)) + 1
304             end if
305
306             if (taskid.eq.int_mb(indx(1)+j)) then
307               packet_size(npacket) = short
308               indx_start(npacket) =
309     >            int_mb(nwave2(1)+int_mb(indx(1)+j)) - short + 1
310             end if
311
312             int_mb(nwave2(1)+int_mb(indx(1)+i)) =
313     >         int_mb(nwave2(1)+int_mb(indx(1)+i)) + short
314             int_mb(nwave2(1)+int_mb(indx(1)+j)) =
315     >         int_mb(nwave2(1)+int_mb(indx(1)+j)) - short
316             i = i + 1
317
318
319           else if (short.gt.long) then
320             if (taskid.eq.int_mb(indx(1)+i)) then
321               packet_size(npacket) = long
322               indx_start(npacket) =
323     >           int_mb(nwave2(1)+int_mb(indx(1)+i)) + 1
324             end if
325
326             if (taskid.eq.int_mb(indx(1)+j)) then
327               packet_size(npacket) = long
328               indx_start(npacket) =
329     >           int_mb(nwave2(1)+int_mb(indx(1)+j)) - long + 1
330             end if
331
332             int_mb(nwave2(1)+int_mb(indx(1)+i)) =
333     >          int_mb(nwave2(1)+int_mb(indx(1)+i)) + long
334             int_mb(nwave2(1)+int_mb(indx(1)+j)) =
335     >          int_mb(nwave2(1)+int_mb(indx(1)+j)) - long
336             j = j - 1
337
338           end if
339
340           if (i .gt. below) done = .true.
341           if (j .lt. above) done = .true.
342
343        end do
344
345      end if
346
347      nwave_out = int_mb(nwave2(1)+taskid)
348
349      value =           BA_pop_stack(indx(2))
350      value = value.and.BA_pop_stack(nwave2(2))
351      if (.not. value)
352     > call errquit('Balance_init_a:error freeing stack memory',0,
353     &       MA_ERR)
354
355
356      return
357      end
358
359      subroutine nwave2_sort(n,f,indx)
360      integer n
361      integer f(0:(n-1))
362      integer indx(0:(n-1))
363
364      integer i,j,idum
365      do i=0,n-1
366        indx(i) = i
367      end do
368      do i=0,(n-2)
369      do j=i+1,(n-1)
370        if (f(indx(j)).lt.f(indx(i))) then
371              idum    = indx(i)
372              indx(i) = indx(j)
373              indx(j) = idum
374           end if
375      end do
376      end do
377
378      return
379      end
380
381*     ************************************
382*     *                                  *
383*     *         Balance_c_balance        *
384*     *                                  *
385*     ************************************
386
387      subroutine Balance_c_balance(nb,A)
388      implicit none
389      integer nb
390      complex*16 A(*)
391
392#include "bafdecls.fh"
393#include "tcgmsg.fh"
394#include "msgtypesf.h"
395#include "balance_common.fh"
396
397*     **** local variables ****
398      integer  rcv_len,rcv_proc
399      integer j
400      integer pto,pfrom,msglen,indx
401
402*     **** external functions ****
403      integer  Parallel2d_convert_taskid_i
404      external Parallel2d_convert_taskid_i
405
406!$OMP MASTER
407      if (log_mb(sender_list(1)+nb)) then
408         do j=1,int_mb(npacket_list(1)+nb)
409            pto    = int_mb(int_mb(proc_to_list(1)    +2*nb)+j-1)
410            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
411            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
412c            send data....
413            if (msglen.gt.0) then
414               call SND(9+MSGDBL,
415     >                  A(indx),
416     >                  mdtob(2*msglen),
417     >                  Parallel2d_convert_taskid_i(pto),
418     >                  1)
419            end if
420
421
422         end do
423      end if
424
425      if (log_mb(receiver_list(1)+nb)) then
426         do j=1,int_mb(npacket_list(1)+nb)
427            pfrom  = int_mb(int_mb(proc_from_list(1)  +2*nb)+j-1)
428            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
429            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
430c            recieve data....
431            if (msglen.gt.0) then
432               call RCV(9+MSGDBL,
433     >                  A(indx),
434     >                  mdtob(2*msglen),rcv_len,
435     >                  Parallel2d_convert_taskid_i(pfrom),
436     >                  rcv_proc,1)
437            end if
438
439         end do
440      end if
441!$OMP END MASTER
442!$OMP BARRIER
443
444      return
445      end
446
447
448*     ************************************
449*     *                                  *
450*     *         Balance_t_balance        *
451*     *                                  *
452*     ************************************
453
454      subroutine Balance_t_balance(nb,A)
455      implicit none
456      integer nb
457      real*8 A(*)
458
459#include "bafdecls.fh"
460#include "tcgmsg.fh"
461#include "msgtypesf.h"
462#include "balance_common.fh"
463
464*     **** local variables ****
465      integer  rcv_len,rcv_proc
466      integer j
467      integer pto,pfrom,msglen,indx
468
469*     **** external functions ****
470      integer  Parallel2d_convert_taskid_i
471      external Parallel2d_convert_taskid_i
472
473!$OMP MASTER
474      if (log_mb(sender_list(1)+nb)) then
475         do j=1,int_mb(npacket_list(1)+nb)
476            pto    = int_mb(int_mb(proc_to_list(1)    +2*nb)+j-1)
477            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
478            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
479c            send data....
480            if (msglen.gt.0) then
481               call SND(9+MSGDBL,
482     >                  A(indx),
483     >                  mdtob(msglen),
484     >                  Parallel2d_convert_taskid_i(pto),
485     >                  1)
486            end if
487
488
489         end do
490      end if
491
492      if (log_mb(receiver_list(1)+nb)) then
493         do j=1,int_mb(npacket_list(1)+nb)
494            pfrom  = int_mb(int_mb(proc_from_list(1)  +2*nb)+j-1)
495            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
496            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
497c            recieve data....
498            if (msglen.gt.0) then
499               call RCV(9+MSGDBL,
500     >                  A(indx),
501     >                  mdtob(msglen),rcv_len,
502     >                  Parallel2d_convert_taskid_i(pfrom),
503     >                  rcv_proc,1)
504            end if
505
506
507         end do
508      end if
509!$OMP END MASTER
510!$OMP BARRIER
511
512      return
513      end
514
515
516*     ************************************
517*     *                                  *
518*     *         Balance_c_unbalance      *
519*     *                                  *
520*     ************************************
521
522      subroutine Balance_c_unbalance(nb,A)
523      implicit none
524      integer nb
525      complex*16 A(*)
526
527#include "bafdecls.fh"
528#include "tcgmsg.fh"
529#include "msgtypesf.h"
530#include "balance_common.fh"
531
532*     **** local variables ****
533      integer  rcv_len,rcv_proc
534      integer j
535      integer pto,pfrom,msglen,indx
536
537*     **** external functions ****
538      integer  Parallel2d_convert_taskid_i
539      external Parallel2d_convert_taskid_i
540
541!$OMP MASTER
542      if (log_mb(sender_list(1)+nb)) then
543         do j=1,int_mb(npacket_list(1)+nb)
544            pfrom  = int_mb(int_mb(proc_to_list(1)    +2*nb)+j-1)
545            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
546            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
547c            recieve data....
548            if (msglen.gt.0) then
549               call RCV(9+MSGDBL,
550     >                  A(indx),
551     >                  mdtob(2*msglen),rcv_len,
552     >                  Parallel2d_convert_taskid_i(pfrom),
553     >                  rcv_proc,1)
554            end if
555
556         end do
557      end if
558
559      if (log_mb(receiver_list(1)+nb)) then
560         do j=1,int_mb(npacket_list(1)+nb)
561            pto    = int_mb(int_mb(proc_from_list(1)  +2*nb)+j-1)
562            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
563            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
564c            send data....
565            if (msglen.gt.0) then
566               call SND(9+MSGDBL,
567     >                  A(indx),
568     >                  mdtob(2*msglen),
569     >                  Parallel2d_convert_taskid_i(pto),1)
570            end if
571
572         end do
573      end if
574!$OMP END MASTER
575!$OMP BARRIER
576
577      return
578      end
579
580
581*     ************************************
582*     *                                  *
583*     *         Balance_i_balance        *
584*     *                                  *
585*     ************************************
586
587      subroutine Balance_i_balance(nb,A)
588      implicit none
589      integer nb
590      integer A(*)
591
592#include "bafdecls.fh"
593#include "tcgmsg.fh"
594#include "msgtypesf.h"
595#include "balance_common.fh"
596
597*     **** local variables ****
598      integer  rcv_len,rcv_proc
599      integer j
600      integer pto,pfrom,msglen,indx
601
602*     **** external functions ****
603      integer  Parallel2d_convert_taskid_i
604      external Parallel2d_convert_taskid_i
605
606      if (log_mb(sender_list(1)+nb)) then
607         do j=1,int_mb(npacket_list(1)+nb)
608            pto    = int_mb(int_mb(proc_to_list(1)    +2*nb)+j-1)
609            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
610            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
611c            send data....
612            if (msglen.gt.0) then
613               call SND(9+MSGINT,
614     >                  A(indx),
615     >                  mitob(msglen),
616     >                  Parallel2d_convert_taskid_i(pto),1)
617            end if
618
619
620         end do
621      end if
622
623      if (log_mb(receiver_list(1)+nb)) then
624         do j=1,int_mb(npacket_list(1)+nb)
625            pfrom  = int_mb(int_mb(proc_from_list(1)  +2*nb)+j-1)
626            msglen = int_mb(int_mb(packet_size_list(1)+2*nb)+j-1)
627            indx   = int_mb(int_mb(indx_start_list(1) +2*nb)+j-1)
628c            recieve data....
629            if (msglen.gt.0) then
630               call RCV(9+MSGINT,
631     >                  A(indx),
632     >                  mitob(msglen),rcv_len,
633     >                  Parallel2d_convert_taskid_i(pfrom),
634     >                  rcv_proc,1)
635            end if
636
637         end do
638      end if
639
640      return
641      end
642
643