1*     PART 6
2*
3*     File mpi.f
4*     -------------
5*
6*     Subroutines for parallel execution (MPI)
7*     Uses double precision in the data transfer
8*
9*    1. Parallel initialization
10*    --------------------------
11C    This subroutine initialize MPI interface and get two numbers:
12C      NUMTASK  - total number of nodes available
13C      TASKID   - current node  (TASKID=0,1,...,NUMTASK-1)
14C
15C======= PARAINI ===================================================
16C
17      subroutine PARAINI(numtask,taskid)
18      include "mpif.h"
19      integer numtask,taskid
20      call MPI_INIT( ierr )
21      call MPI_COMM_RANK( MPI_COMM_WORLD, taskid, ierr )
22      call MPI_COMM_SIZE( MPI_COMM_WORLD, numtask, ierr )
23      print *, "Process ", taskid, " of ", numtask, " is alive"
24      return
25      end
26*
27*    2. Broadcast atom positions to all nodes
28*    ----------------------------------------
29C    This subroutine broadcast positions of atoms to all the nodes
30C    Each node operate with atoms from NAB(TASKID) to NAE(TASKID),
31C    but for force evaluation each node should know exact positions
32C    of all the atoms.
33C    The are two regimes of this subroutine defined by parameter IND:
34C      IND=1  - atoms distributed non-equally between the nodes
35C               (case of constrained dunamics)
36C      IND=2  - each node (except the last one) has equal number of atoms
37C               This is used in DOUBLE subroutine
38C
39C============ SINHR ===================================
40C
41	subroutine SINHR(IND)
42	include "prcm.h"
43C  Making buffers real*4 will not affect precision noticaebly, but
44C  save communication time greatly
45	real*8 BUFF(NBUFF),BUF2(NBUFF)
46	include "mpif.h"
47        timen0	= cputime(0.d0)
48C   This normally not necessary
49*      call MPI_BARRIER(MPI_COMM_WORLD,IERR)
50*
51*   2.1 Case of non-equal number of atoms on processors
52*   ---------------------------------------------------
53	if(IND.eq.1)then   !    (used in constrain dynamics)
54*   2.1.1 Put coordinates into buffer
55C   This is number atoms on the processor
56          NPC=NAP(TASKID)
57	  NPC3=NPC*3
58	  do K=1,NPC
59	    I=NABS(TASKID)+K
60	    BUFF(K)=SX(I)
61	    BUFF(K+NPC)=SY(I)
62	    BUFF(K+2*NPC)=SZ(I)
63	  end do
64*    2.1.2 Communicate
65C    This subroutine gather coordinates from all the nodes
66C    and then broadcast them
67          call MPI_ALLGATHERV(BUFF,NPC3,MPI_DOUBLE_PRECISION,BUF2,NAP3,
68     +NABS3,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,ierr)
69C   This normally not necessary
70*         call MPI_BARRIER(MPI_COMM_WORLD,IERR)
71*    2.1.3 Reconstruct coordinates from the buffer
72	  do K=0,NUMTASK-1
73	      NPC=NAP(K)
74              do J=NAB(K),NAE(K)
75	        I=J+2*NABS(K)
76	        SX(J)=BUF2(I)
77	        SY(J)=BUF2(I+NPC)
78	        SZ(J)=BUF2(I+2*NPC)
79	      end do
80	  end do
81	end if
82*
83*    2.2  Case of equal number of atoms on processors
84*    ------------------------------------------------
85	if(IND.eq.2)then
86*  in DOUBLE - each short time step
87	  NAPC3=NAPC*3
88	  ISH=NABS(TASKID)
89	  if(NBUFF.le.NAPC3)then
90	    write(*,*)' Increase NBUFF in prcm.h to ',NAPC3
91	    call FINAL
92	  end if
93*    2.2.1 Write in buffer
94	  do I=1,NAPC
95	    BUFF(I)=SX(I+ISH)
96	    BUFF(I+NAPC)=SY(I+ISH)
97	    BUFF(I+2*NAPC)=SZ(I+ISH)
98	  END DO
99*    2.2.2 Communicate
100        call MPI_ALLGATHER(BUFF,NAPC3,MPI_DOUBLE_PRECISION,BUF2,NAPC3,
101     +  MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,ierr)
102C   This normally not necessary
103*        call MPI_BARRIER(MPI_COMM_WORLD,IERR)
104*    2.2.3 Restore coordinates from the buffer
105	  do K=0,NUMTASK-1
106            do J=NAB(K),NAE(K)
107	      I=J+2*NABS(K)
108	      SX(J)=BUF2(I)
109	      SY(J)=BUF2(I+NAPC)
110	      SZ(J)=BUF2(I+2*NAPC)
111	    end do
112	end do
113      end if    ! IF(IAL...
114C
115      timen	= timen+cputime(timen0)
116      return
117      end
118*
119*    3. Communicate atom velocities to the master node
120*    ----------------------------------------
121C    This subroutine communicate velocities of atoms to the master node
122C    Each node operate with atoms from NAB(TASKID) to NAE(TASKID),
123C    but for evaluation of some averages the master node should know
124C    velocities of all the atoms.
125C    The are two regimes of this subroutine defined by parameter IND:
126C      IND=1  - atoms distributed non-equally between the nodes
127C               (case of constrained dunamics)
128C      IND=2  - each node (except the last one) has equal number of atoms
129C               This is used in DOUBLE subroutine
130C
131C============ SINHV ===================================
132C
133	subroutine SINHV(IND)
134	include "prcm.h"
135	include "mpif.h"
136	real*8 BUFF(NBUFF),BUF2(NBUFF)
137        timen0	= cputime(0.d0)
138	ISH=NAB(TASKID)
139*
140*   3.1 Case of non-equal number of atoms on processors
141*   ---------------------------------------------------
142	if(IND.eq.1)then
143          NPC=NAP(TASKID)
144	  NPC3=NPC*3
145	  do K=1,NPC
146	    I=NABS(TASKID)+K
147	    BUFF(K)=VX(I)
148	    BUFF(K+NPC)=VY(I)
149	    BUFF(K+2*NPC)=VZ(I)
150	  end do
151*    3.1.2 Communicate
152C    This subroutine gather velocities from all the nodes
153C    and then broadcast them
154          call MPI_GATHERV(BUFF,NPC3,MPI_DOUBLE_PRECISION,BUF2,NAP3,
155     +NABS3,MPI_DOUBLE_PRECISION,MAST,MPI_COMM_WORLD,ie)
156          if(TASKID.eq.MAST)then
157	    do K=0,NUMTASK-1
158              if(K.ne.MAST)then
159	      NPC=NAP(K)
160              do J=NAB(K),NAE(K)
161	        I=J+2*NABS(K)
162	        VX(J)=BUF2(I)
163	        VY(J)=BUF2(I+NPC)
164	        VZ(J)=BUF2(I+2*NPC)
165	      end do
166              end if
167            end do
168	  end if
169*   3.2 Case of equal number of atoms on processors
170*   -----------------------------------------------
171	else   ! IND=2
172	  NAPC3=NAPC*3
173	  ISH=NABS(TASKID)
174	  if(NBUFF.le.NAPC3)then
175	    write(*,*)' Increase NBUFF in prcm.h to ',NAPC3
176	    call FINAL
177	  end if
178*    3.2.1 Write in buffer
179	  do I=1,NAPC
180	    BUFF(I)=VX(I+ISH)
181	    BUFF(I+NAPC)=VY(I+ISH)
182	    BUFF(I+2*NAPC)=VZ(I+ISH)
183	  END DO
184*    3.2.2 Communicate
185          call MPI_GATHER(BUFF,NAPC3,MPI_DOUBLE_PRECISION,BUF2,NAPC3,
186     +    MPI_DOUBLE_PRECISION,MAST,MPI_COMM_WORLD,ierr)
187*    3.2.3 Restore coordinates from the buffer
188          if(TASKID.eq.MAST)then
189	    do K=0,NUMTASK-1
190              if(K.ne.MAST)then
191              do J=NAB(K),NAE(K)
192	        I=J+2*NABS(K)
193	        VX(J)=BUF2(I)
194	        VY(J)=BUF2(I+NAPC)
195	        VZ(J)=BUF2(I+2*NAPC)
196	      end do
197              end if
198	    end do
199          end if
200	end if
201*
202*   3.3 Synchronize all the distancies and thermostat parameters
203*   ------------------------------------------------------------
204C   In principle, each node solve the same set of Nose-Hoover equations
205C   with the same parameters, and should synchroniously change them.
206C   This is only to ensure that they are the same on all the nodes
207	if(TASKID.eq.MAST)then
208	  BUFF(1)=SC
209	  BUFF(2)=BOXL
210	  BUFF(3)=BOYL
211	  BUFF(4)=BOZL
212	  BUFF(5)=SCL
213	  BUFF(6)=SCLX
214	  BUFF(7)=SCLY
215	  BUFF(8)=SCLZ
216	end if
217	call MPI_BCAST(BUFF,8,MPI_DOUBLE_PRECISION,MAST,
218     +MPI_COMM_WORLD,ie)
219	SC=BUFF(1)
220	BOXL=BUFF(2)
221	BOYL=BUFF(3)
222	BOZL=BUFF(4)
223	SCL=BUFF(5)
224	SCLX=BUFF(6)
225	SCLY=BUFF(7)
226	SCLZ=BUFF(8)
227	call RECLEN(ODIN,ODIN,ODIN)
228        timen	= timen+cputime(timen0)
229      return
230      end
231*
232*    4.  Sum up fast forces
233*    ----------------------
234C   This procedure sum up fast forces (HX) acting on each particle
235C   The result is send on "home" node for each particle
236C   Contributions to energies and virials from all the nodes are
237C   also summed up
238C
239C=================== CM_ADDF ======================================
240C
241	subroutine CM_ADDF(LAVER)
242	include "prcm.h"
243	include "mpif.h"
244	real*8 BUFF(NBUFF),BUF2(NBUFF)
245	logical LAVER
246        timen0	= cputime(0.d0)
247*
248*   4.1 Sum up energies and virials
249*
250C  This ie done only at the last short time step, because all these
251C  variables are not used during intermidiate time steps
252	if(LAVER)then
253	  BUFF(1)=PE2             !  LJ energy
254	  BUFF(2)=PELS2           !  electrostatic energy
255	  BUFF(3)=PINT            !  bond/angle intramolec. energy
256	  BUFF(4)=VIR2            !  LJ virial
257	  BUFF(5)=WIRSS           !  correction to molec. virial
258	  BUFF(6)=VIRB            !  bond virial
259	  BUFF(7)=VIRFX           !  intramolec. virial projections
260	  BUFF(8)=VIRFY
261	  BUFF(9)=VIRFZ
262	  ICNT=10+MOLINT
263          if(LSCFL)then
264            BUFF(10)=1.
265          else
266            BUFF(10)=0.
267          end if
268	  do INT=1,MOLINT
269	    BUFF(10+INT)=POTE1(INT)
270	    BUFF(ICNT+INT)=POTL1(INT)
271	  end do
272	  ICNT=ICNT+MOLINT
273	  ICNT2=ICNT+NTYPES
274	  do I=1,NTYPES
275	    BUFF(ICNT+I)=PES141(I)
276	    BUFF(ICNT2+I)=PSR141(I)
277	  end do
278	  ICNT=ICNT2+NTYPES
279          if(ISTAV.gt.0)then
280	  if(NRBB.gt.0)then
281	    ICNT2=ICNT+NRBB
282	    do I=1,NRBB
283	      BUFF(ICNT+I)=BR(I)
284	      BUFF(ICNT2+I)=BE(I)
285	    end do
286	    ICNT=ICNT2+NRBB
287	  end if
288	  if(NRAA.gt.0)then
289	    ICNT2=ICNT+NRAA
290	    do I=1,NRAA
291	      BUFF(ICNT+I)=AR(I)
292	      BUFF(ICNT2+I)=AE(I)
293	    end do
294	    ICNT=ICNT2+NRAA
295	  end if
296	  if(NRTT.gt.0)then
297	    ICNT2=ICNT+NRTT
298	    do I=1,NRTT
299	      BUFF(ICNT+I)=TR(I)
300	      BUFF(ICNT2+I)=TE(I)
301	    end do
302	    ICNT=ICNT2+NRTT
303	  end if
304          end if
305	  ICNTS=ICNT
306	  if(ICNT.gt.NBUFF)then
307	    write(*,*) 'increase NBUFF to ',ICNT
308	    call FINAL
309	  end if
310          if(LPIMD)then
311            do I=1,ICNT
312              BUF2(I)=BUFF(I)
313            end do
314          else
315            call MPI_REDUCE(BUFF,BUF2,ICNT,MPI_DOUBLE_PRECISION,MPI_SUM,
316     $      MAST,MPI_COMM_WORLD,ierr)
317          end if
318	    PE2=BUF2(1)
319	    PELS2 = BUF2(2)
320	    PINT = BUF2(3)
321            VIR2= BUF2(4)
322	    WIRSS=BUF2(5)
323	    VIRB=BUF2(6)
324	    VIRFX=BUF2(7)
325	    VIRFY=BUF2(8)
326	    VIRFZ=BUF2(9)
327            if(BUFF(10).gt.0.5)then
328              LSCFL=.true.
329            else
330              LSCFL=.false.
331            end if
332	    ICNT2=10+MOLINT
333	    do INT=1,MOLINT
334	      POTES(INT)=POTES(INT)+BUF2(10+INT)
335	      POTLJ(INT)=POTLJ(INT)+BUF2(ICNT2+INT)
336	    end do
337	    ICNT=ICNT2+MOLINT
338	    ICNT2=ICNT+NTYPES
339	    do I=1,NTYPES
340	      PES14(I)=PES14(I)+BUF2(ICNT+I)
341	      PSR14(I)=PSR14(I)+BUF2(ICNT2+I)
342	    end do
343	    ICNT=ICNT2+NTYPES
344            if(ISTAV.gt.0)then
345	    if(NRBB.gt.0)then
346	      ICNT2=ICNT+NRBB
347	      do I=1,NRBB
348	        BB(I)=BB(I)+BUF2(ICNT+I)
349	        EB(I)=EB(I)+BUF2(ICNT2+I)
350	      end do
351	      ICNT=ICNT2+NRBB
352	    end if
353	    if(NRAA.gt.0)then
354	      ICNT2=ICNT+NRAA
355	      do I=1,NRAA
356	        AA(I)=AA(I)+BUF2(ICNT+I)
357	        EA(I)=EA(I)+BUF2(ICNT2+I)
358	      end do
359	      ICNT=ICNT2+NRAA
360	    end if
361	    if(NRTT.gt.0)then
362	      ICNT2=ICNT+NRTT
363	      do I=1,NRTT
364	        TT(I)=TT(I)+BUF2(ICNT+I)
365	        ET(I)=ET(I)+BUF2(ICNT2+I)
366	      end do
367	      ICNT=ICNT2+NRTT
368	    end if
369            end if
370	    if(ICNT.ne.ICNTS)write(*,*)' sent ',ICNTS,' received ',ICNT
371        end if !  LAVER
372*
373*   4.2  Sum up forces
374*   ------------------
375        if(.not.LPIMD)then
376 	do J=0,NUMTASK-1
377	  I0=3*NABS(J)
378	  I1=I0+NAP(J)
379	  I2=I1+NAP(J)
380	  do K=1,NAP(J)
381	    I=NABS(J)+K
382	    BUFF(I0+K)=HX(I)
383	    BUFF(I1+K)=HY(I)
384	    BUFF(I2+K)=HZ(I)
385	  END DO
386	end do
387        call MPI_REDUCE_SCATTER(BUFF,BUF2,NAP3,MPI_DOUBLE_PRECISION,
388     +  MPI_SUM,MPI_COMM_WORLD,ierr)
389        do J=NAB(TASKID),NAE(TASKID)
390	  I=J-NABS(TASKID)
391	  HX(J)=BUF2(I)
392	  HY(J)=BUF2(I+NAP(TASKID))
393	  HZ(J)=BUF2(I+2*NAP(TASKID))
394        end do
395      end if ! .not.LPIMD
396      timen	= timen+cputime(timen0)
397      return
398      end
399*
400*    5.  Sum up slow forces
401*    ----------------------
402C   This procedure sum up slow forces (GX) acting on each particle
403C   The result is send on "home" node for each particle
404C   Contributions to energies and virials from all the nodes are
405C   also summed up
406CM
407CM=================== CM_ADDLF ======================================
408CM
409      subroutine CM_ADDLF
410      include "prcm.h"
411      real*8 BUFF(NBUFF),BUF2(NBUFF)
412      include "mpif.h"
413      timen0	= cputime(0.d0)
414*
415*   5.1 Sum up forces
416*   -----------------
417      if(LPIMD)then
418*  only virial computation
419        DO N         = 1,NSTOT
420	  M	= NNUM(N)
421	  WIRS	= WIRS-GX(N)*(SX(N)-X(M))-GY(N)*(SY(N)-Y(M))-
422     +               GZ(N)*(SZ(N)-Z(M))
423        END DO! OF N
424*  normal run
425      else
426        do J=0,NUMTASK-1
427	  I0=3*NABS(J)
428	  I1=I0+NAP(J)
429	  I2=I1+NAP(J)
430C
431C   NAB(I) is the first atom for which node I is responsible
432C   NAE(I) is the last atom for which node I is responsible
433C   NABS(I)=NAB(I)-1
434C   NAP(I)=NAE(I)-NABS(I) is the number of atoms for node I
435C   (these are set in module UNITS, file setup.f )
436C
437	  do K=1,NAP(J)
438	    I=NABS(J)+K
439	    BUFF(I0+K)=GX(I)
440	    BUFF(I1+K)=GY(I)
441	    BUFF(I2+K)=GZ(I)
442	  END DO
443        end do
444        call MPI_REDUCE_SCATTER(BUFF,BUF2,NAP3,MPI_DOUBLE_PRECISION,
445     +  MPI_SUM,MPI_COMM_WORLD,ierr)
446        do J=NAB(TASKID),NAE(TASKID)
447	  I=J-NABS(TASKID)
448	  GX(J)=BUF2(I)
449	  GY(J)=BUF2(I+NAP(TASKID))
450	  GZ(J)=BUF2(I+2*NAP(TASKID))
451	  M	= NNUM(J)
452	  WIRS	= WIRS-GX(J)*(SX(J)-X(M))-GY(J)*(SY(J)-Y(M))-
453     +               GZ(J)*(SZ(J)-Z(M))
454        end do
455      end if
456*
457*   5.2  Sum up other data
458*   ----------------------
459      BUFF(1) = VIRX
460      BUFF(2) = VIRY
461      BUFF(3) = VIRZ
462      BUFF(4) = VIR1
463      BUFF(5) = PE1
464      BUFF(6) = PELS1
465      BUFF(7) = SPE
466      BUFF(8) = WIRS
467      ICNT=8
468      do INT=1,MOLINT
469	BUFF(INT+ICNT)=POTE1(INT)
470	BUFF(INT+MOLINT+ICNT)=POTL1(INT)
471      end do
472      ICNT=ICNT+2*MOLINT
473      ICNT2=ICNT+NTYPES
474      ICNT3=ICNT2+NTYPES
475      do I=1,NTYPES
476	BUFF(I+ICNT)=PES141(I)
477	BUFF(I+ICNT2)=PSR141(I)
478	BUFF(I+ICNT3)=SPEE(I)
479      end do
480      ICNT=ICNT3+NTYPES
481      if(LPIMD)then
482        do I=1,ICNT
483          BUF2(I)=BUFF(I)
484        end do
485      else
486        call MPI_REDUCE(BUFF,BUF2,ICNT,MPI_DOUBLE_PRECISION,MPI_SUM,
487     $     MAST,MPI_COMM_WORLD,ierr)
488      end if
489	VIRX = BUF2(1)
490	VIRY = BUF2(2)
491	VIRZ = BUF2(3)
492	VIR1 = BUF2(4)
493	PE1  = BUF2(5)
494	PELS1= BUF2(6)
495	SPE  = BUF2(7)
496        WIRS = BUF2(8)
497	ICNT=8
498	do INT=1,MOLINT
499	  POTES(INT)=POTES(INT)+BUF2(INT+ICNT)
500	  POTLJ(INT)=POTLJ(INT)+BUF2(INT+MOLINT+ICNT)
501	end do
502	ICNT=ICNT+2*MOLINT
503	ICNT2=ICNT+NTYPES
504	ICNT3=ICNT2+NTYPES
505	do I=1,NTYPES
506	  PES14(I)=PES14(I)+BUF2(I+ICNT)
507	  PSR14(I)=PSR14(I)+BUF2(I+ICNT2)
508	  SELFPE(I)=SELFPE(I)+BUF2(I+ICNT3)
509	end do
510      timen	= timen+cputime(timen0)
511      return
512      end
513*
514*    6.  Sum up all forces
515*    ---------------------
516C   This procedure sum up all forces (FX) acting on each particle
517C   The result is send on "home" node for each particle
518C   Contributions to energies and virials from all the nodes are
519C   also summed up
520CM
521CM=================== CM_ADDLA ======================================
522CM
523      subroutine CM_ADDLA
524      include "prcm.h"
525      real*8 BUFF(NBUFF),BUF2(NBUFF)
526      include "mpif.h"
527      if(LPIMD)return
528      timen0	= cputime(0.d0)
529      do J=0,NUMTASK-1
530	I0=3*NABS(J)
531	I1=I0+NAP(J)
532	I2=I1+NAP(J)
533	do K=1,NAP(J)
534	  I=NABS(J)+K
535	  BUFF(I0+K)=GX(I)+HX(I)
536	  BUFF(I1+K)=GY(I)+HY(I)
537	  BUFF(I2+K)=GZ(I)+HZ(I)
538	  M	= NNUM(I)
539	  WIRS	= WIRS-GX(I)*(SX(I)-X(M))-GY(I)*(SY(I)-Y(M))-
540     +               GZ(I)*(SZ(I)-Z(M))
541	END DO
542      end do
543      call MPI_REDUCE_SCATTER(BUFF,BUF2,NAP3,MPI_DOUBLE_PRECISION,
544     +  MPI_SUM,MPI_COMM_WORLD,ierr)
545      do J=NAB(TASKID),NAE(TASKID)
546	I=J-NABS(TASKID)
547	FX(J)=BUF2(I)
548	FY(J)=BUF2(I+NAP(TASKID))
549	FZ(J)=BUF2(I+2*NAP(TASKID))
550      end do
551      BUFF(1) = VIRX
552      BUFF(2) = VIRY
553      BUFF(3) = VIRZ
554      BUFF(4) = VIR1
555      BUFF(5) = VIR2
556      BUFF(6)= VIRB
557      BUFF(7)= VIRFX
558      BUFF(8)= VIRFY
559      BUFF(9)= VIRFZ
560      BUFF(10)= WIRSS
561      BUFF(11) = VIRA(1)
562      BUFF(12) = VIRA(2)
563      BUFF(13) = VIRA(3)
564      BUFF(14) = PE1
565      BUFF(15) = PELS1
566      BUFF(16) = PE2
567      BUFF(17) = PELS2
568      BUFF(18) = PINT
569      BUFF(19) = SPE
570      BUFF(20) = WIRS
571      if(LSCFL)then
572        BUFF(21)=1.
573      else
574        BUFF(21)=0.
575      end if
576      ICNT=21
577      do INT=1,MOLINT
578	BUFF(INT+ICNT)=POTE1(INT)
579	BUFF(INT+MOLINT+ICNT)=POTL1(INT)
580      end do
581      ICNT=ICNT+2*MOLINT
582      ICNT2=ICNT+NTYPES
583      ICNT3=ICNT2+NTYPES
584      do I=1,NTYPES
585	BUFF(I+ICNT)=PES141(I)
586	BUFF(I+ICNT2)=PSR141(I)
587	BUFF(I+ICNT3)=SPEE(I)
588      end do
589      ICNT=ICNT3+NTYPES
590      if(ISTAV.gt.0)then
591      if(NRBB.gt.0)then
592	ICNT2=ICNT+NRBB
593	do I=1,NRBB
594	  BUFF(ICNT+I)=BR(I)
595	  BUFF(ICNT2+I)=BE(I)
596	end do
597	ICNT=ICNT2+NRBB
598      end if
599      if(NRAA.gt.0)then
600	    ICNT2=ICNT+NRAA
601	    do I=1,NRAA
602	      BUFF(ICNT+I)=AR(I)
603	      BUFF(ICNT2+I)=AE(I)
604	    end do
605	    ICNT=ICNT2+NRAA
606      end if
607      if(NRTT.gt.0)then
608	    ICNT2=ICNT+NRTT
609	    do I=1,NRTT
610	      BUFF(ICNT+I)=TR(I)
611	      BUFF(ICNT2+I)=TE(I)
612	    end do
613	    ICNT=ICNT2+NRTT
614      end if
615      end if
616      ICNTS=ICNT
617      if(ICNT.gt.NBUFF)then
618	    write(*,*) 'increase NBUFF to ',ICNT
619	    call FINAL
620      end if
621      call MPI_REDUCE(BUFF,BUF2,ICNT,MPI_DOUBLE_PRECISION,MPI_SUM,
622     $     MAST,MPI_COMM_WORLD,ierr)
623      if(TASKID.eq.MAST)then
624	VIRX = BUF2(1)
625	VIRY = BUF2(2)
626	VIRZ = BUF2(3)
627	VIR1 = BUF2(4)
628	VIR2 = BUF2(5)
629	VIRB = BUF2(6)
630	VIRFX= BUF2(7)
631	VIRFY= BUF2(8)
632	VIRFZ= BUF2(9)
633        WIRSS= BUF2(10)
634        VIRA(1)=BUF2(11)
635        VIRA(2)=BUF2(12)
636        VIRA(3)=BUF2(13)
637	PE1  = BUF2(14)
638	PELS1= BUF2(15)
639	PE2  = BUF2(16)
640	PELS2= BUF2(17)
641	PINT = BUF2(18)
642	SPE  = BUF2(19)
643        WIRS = BUF2(20)
644        if(BUFF(21).gt.0.5)then
645          LSCFL=.true.
646        else
647          LSCFL=.false.
648        end if
649	ICNT=21
650	do INT=1,MOLINT
651	  POTES(INT)=POTES(INT)+BUF2(INT+ICNT)
652	  POTLJ(INT)=POTLJ(INT)+BUF2(INT+MOLINT+ICNT)
653	end do
654	ICNT=ICNT+2*MOLINT
655	ICNT2=ICNT+NTYPES
656	ICNT3=ICNT2+NTYPES
657	do I=1,NTYPES
658	  PES14(I)=PES14(I)+BUF2(I+ICNT)
659	  PSR14(I)=PSR14(I)+BUF2(I+ICNT2)
660	  SELFPE(I)=SELFPE(I)+BUF2(I+ICNT3)
661	end do
662        ICNT=ICNT3+NTYPES
663        if(ISTAV.gt.0)then
664	if(NRBB.gt.0)then
665	    ICNT2=ICNT+NRBB
666	    do I=1,NRBB
667	      BB(I)=BB(I)+BUF2(ICNT+I)
668	      EB(I)=EB(I)+BUF2(ICNT2+I)
669	    end do
670	    ICNT=ICNT2+NRBB
671	end if
672	if(NRAA.gt.0)then
673	    ICNT2=ICNT+NRAA
674	    do I=1,NRAA
675	      AA(I)=AA(I)+BUF2(ICNT+I)
676	      EA(I)=EA(I)+BUF2(ICNT2+I)
677	    end do
678	    ICNT=ICNT2+NRAA
679	end if
680	if(NRTT.gt.0)then
681	    ICNT2=ICNT+NRTT
682	    do I=1,NRTT
683	      TT(I)=TT(I)+BUF2(ICNT+I)
684	      ET(I)=ET(I)+BUF2(ICNT2+I)
685	    end do
686	    ICNT=ICNT2+NRTT
687	end if
688        end if
689        if(ICNT.ne.ICNTS)write(*,*)' sent ',ICNTS,' received ',ICNT
690      end if
691      timen	= timen+cputime(timen0)
692      return
693      end
694*
695*===================== GATHER =======================================
696*
697	subroutine GATHER(NRNOD)
698	include "prcm.h"
699	include "mpif.h"
700	integer AUX(NBDMAX*NTOT),NRNOD(NTPS)
701        if(LPIMD)return
702	do ITYP=1,NTYPES
703	  IABEG=ISADDR(ITYP)+1
704	  IAEND=ISADDR(ITYP+1)
705	  NRN  =NRNOD(ITYP)
706*  prepare to send
707	  if(NRN.eq.TASKID)then
708	    J=0
709	    do I=IABEG,IAEND
710	      J=J+1
711	      AUX(J)=NNBB(I)
712	      do INB=1,NNBB(I)
713	        J=J+1
714	        AUX(J)=INBB(I,INB)
715	      end do
716	    end do
717	    JBUF=J
718	  end if
719*  transfer buffer size
720        call MPI_BCAST(JBUF,1,MPI_INTEGER,NRN,MPI_COMM_WORLD,ie)
721*  transfer data
722	  call MPI_BCAST(AUX,JBUF,MPI_INTEGER,NRN,MPI_COMM_WORLD,ie)
723*  unpack data
724	  if(NRN.ne.TASKID)then
725	    J=0
726	    do I=IABEG,IAEND
727	      J=J+1
728	      NNBB(I)=AUX(J)
729	      do INB=1,NNBB(I)
730	        J=J+1
731	        INBB(I,INB)=AUX(J)
732	      end do
733	    end do
734	  end if
735	  if(J.ne.JBUF)write(*,*)' in GATHER: send ',JBUF,' received ',J,
736     +' for type ',ITYP,' node ',TASKID
737      end do
738	return
739	end
740*
741*============================= ALLSUM ==================================
742*
743	subroutine ALLSUM(A,B,N)
744	real*8 A(N),B(N)
745	include "mpif.h"
746	  call MPI_ALLREDUCE(A,B,N,MPI_DOUBLE_PRECISION,MPI_SUM,
747     $     MPI_COMM_WORLD,ierr)
748	return
749	end
750*
751*============================ SUMMA ===============================
752*
753      subroutine SUMMA(A,B,N,MAST)
754	real*8 A(N),B(N)
755	include "mpif.h"
756      call MPI_REDUCE(A,B,N,MPI_DOUBLE_PRECISION,MPI_SUM,
757     $     MAST,MPI_COMM_WORLD,ierr)
758	return
759	end
760*
761*============================ BCAST ===============================
762*
763      subroutine BCAST(A,N,MAST)
764      real*8 A(N)
765      include "mpif.h"
766      call MPI_BCAST(A,N,MPI_DOUBLE_PRECISION,MAST,MPI_COMM_WORLD,ie)
767      return
768      end
769*
770*============================ BICAST ===============================
771*
772      subroutine BICAST(I,MAST)
773      include "mpif.h"
774      call MPI_BCAST(I,1,MPI_INTEGER,MAST,MPI_COMM_WORLD,ie)
775      return
776      end
777*
778*========================== BARRIER ================================
779*
780      subroutine BARRIER
781      include "mpif.h"
782      call MPI_BARRIER(MPI_COMM_WORLD,IERR)
783      return
784      end
785*
786*============================= FINAL ========================
787*
788	subroutine FINAL
789	include "mpif.h"
790        call MPI_BARRIER(MPI_COMM_WORLD,IERR)
791	call MPI_FINALIZE(ie)
792C        write(*,*)' Program finalized'
793        close(6)
794        close(55)
795	stop
796	end
797*
798*============================= ABORT ========================
799*
800	subroutine ABORT
801	include "mpif.h"
802        call MPI_ABORT(MPI_COMM_WORLD,IERR)
803	call MPI_FINALIZE(ie)
804        write(*,*)' Program aborted'
805        close(6)
806	stop 'MPI finalized'
807	end
808