1#define NBLOCKS 2
2*
3* $Id$
4*
5
6*     ***********************************************************
7*     *								*
8*     *   		   C3dB library				*
9*     *		(NWChem implemenation, version 0.1)	        *
10*     *								*
11*     *   Author - Eric Bylaska					*
12*     *   date   - 11/16/01					*
13*     *								*
14*     ***********************************************************
15*   The C3dB (full complex distributed three-dimensional block) library
16*is to be used for handling three kinds of data structures.  The first
17* data structure, denoted by "r", is a double precision array of
18* length (nx)*ny*nz.  The second data structure, denoted by "c", is
19* a double complex array of length of (nx)*ny*nz.
20*
21*   The two data structures are distributed across threads, p, in
22* the k (i.e. nz) dimension using a cyclic decomposition.  So that
23* a "r" array A is defined as double precision A(nx,ny,nq) on
24* each thread.
25*
26*   Where
27*       np = number of threads
28*       nq = ceil(nz/np).
29*       0 <= p < np
30*       1 <= q <= nq
31*       1 <= k <= nz
32*
33*   The mapping of k -> q is defined as:
34*
35*       k = ((q-1)*np + p) + 1
36*       q = ((k-1) - p)/np + 1
37*       p = (k-1) mod np
38*
39*  Libraries used: mpi, blas, fftpack, and compressed_io
40*
41
42*  common blocks used in this library:
43*
44*       integer nq,nx,ny,nz
45*   common  / C3dB / nq,nx,ny,nz
46*
47*   integer q_map(NFFT3),p_map(NFFT3),k_map(NFFT3)
48*   common /C3dB_mapping / q_map,p_map,k_map
49*
50*     integer iq_to_i1((NFFT1)*NFFT2*NSLABS)
51*     integer iq_to_i2((NFFT1)*NFFT2*NSLABS)
52*     integer i1_start(NPROCS+1)
53*     integer i2_start(NPROCS+1)
54*     common / c_trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
55
56
57*     ***********************************
58*     *					*
59*     *	       Mapping_Init_C3dB	*
60*     *					*
61*     ***********************************
62
63      subroutine Mapping_Init_C3dB(nb)
64      implicit none
65      integer nb
66
67#include "bafdecls.fh"
68#include "errquit.fh"
69#include "C3dB.fh"
70
71
72      integer k,q,p,j
73*     integer kn
74      integer taskid,np
75      logical value
76
77
78      call Parallel3d_np_i(np)
79      call Parallel3d_taskid_i(taskid)
80
81
82*     **************************
83*     ****** Slab mapping ******
84*     **************************
85      if (mapping.eq.1) then
86
87
88*     **** allocate q_map,p_map,k_map
89      value = BA_alloc_get(mt_int,nz(nb),'q_map',q_map(2,nb),
90     >                                       q_map(1,nb))
91      value = value.and.BA_alloc_get(mt_int,nz(nb),'p_map',p_map(2,nb),
92     >                                       p_map(1,nb))
93      value = value.and.BA_alloc_get(mt_int,nz(nb),'k_map',k_map(2,nb),
94     >                                       k_map(1,nb))
95      if (.not. value)
96     > call errquit('Mapping_init:out of heap memory',1, MA_ERR)
97
98
99
100*     ****************************
101*     ****** cyclic mapping ******
102*     ****************************
103      p = 0
104      q = 1
105      do k=1,nz(nb)
106         int_mb(q_map(1,nb)+k-1) = q
107         int_mb(p_map(1,nb)+k-1) = p
108         if (p .eq. taskid) nq(nb) = q
109         p        = p+1
110         if (p .ge. np) then
111            p = 0
112            q = q + 1
113         end if
114      end do
115
116      do k=1,nz(nb)
117         if (int_mb(p_map(1,nb)+k-1) .eq. taskid) then
118            int_mb(k_map(1,nb)+int_mb(q_map(1,nb)+k-1)-1) = k
119         end if
120      end do
121
122      nfft3d(nb)     = nx(nb)*ny(nb)*nq(nb)
123      n2ft3d(nb)     = nfft3d(nb)
124      nfft3d_map(nb) = nfft3d(nb)
125      n2ft3d_map(nb) = n2ft3d(nb)
126
127
128*     ******************************
129*     ****** Hilbert mappings ******
130*     ******************************
131      else
132
133
134*     **** allocate q_map1,p_map1,q_map2,p_map2,q_map3,p_map3 ****
135      value =           BA_alloc_get(mt_int,ny(nb)*nz(nb),
136     >                              'q_map1',
137     >                               q_map1(2,nb),
138     >                               q_map1(1,nb))
139      value = value.and.BA_alloc_get(mt_int,ny(nb)*nz(nb),
140     >                              'p_map1',
141     >                               p_map1(2,nb),
142     >                               p_map1(1,nb))
143
144      value = value.and.BA_alloc_get(mt_int,nz(nb)*nx(nb),
145     >                              'q_map2',
146     >                               q_map2(2,nb),
147     >                               q_map2(1,nb))
148      value = value.and.BA_alloc_get(mt_int,nz(nb)*nx(nb),
149     >                              'p_map2',
150     >                               p_map2(2,nb),
151     >                               p_map2(1,nb))
152
153      value = value.and.BA_alloc_get(mt_int,ny(nb)*nx(nb),
154     >                              'q_map3',
155     >                               q_map3(2,nb),
156     >                               q_map3(1,nb))
157      value = value.and.BA_alloc_get(mt_int,ny(nb)*nx(nb),
158     >                              'p_map3',
159     >                               p_map3(2,nb),
160     >                               p_map3(1,nb))
161      if (.not. value)
162     > call errquit('Mapping_init:out of heap memory',1, MA_ERR)
163
164
165      !**** double grid map1 defined wrt to single grid         ****
166      !**** makes expand and contract routines trivial parallel ****
167      if (mapping2d.eq.1) then
168         if (nb.eq.1) then
169           call hilbert2d_map(ny(nb),nz(nb),int_mb(p_map1(1,nb)))
170         end if
171         call hilbert2d_map(nz(nb),nx(nb),int_mb(p_map2(1,nb)))
172         call hilbert2d_map(nx(nb),ny(nb),int_mb(p_map3(1,nb)))
173      else
174         if (nb.eq.1) then
175           call hcurve_map(ny(nb),nz(nb),int_mb(p_map1(1,nb)))
176         end if
177         call hcurve_map(nz(nb),nx(nb),int_mb(p_map2(1,nb)))
178         call hcurve_map(nx(nb),ny(nb),int_mb(p_map3(1,nb)))
179      end if
180
181
182
183      !**** double grid map1 defined wrt to single grid         ****
184      !**** makes expand and contract routines trivial parallel ****
185      if (nb.eq.1) then
186      call generate_map_indexes(taskid,np,
187     >                          ny(nb),nz(nb),
188     >                          int_mb(p_map1(1,nb)),
189     >                          int_mb(q_map1(1,nb)),nq1(nb))
190      else
191        nq1(2) = 4*nq1(1)
192        call expand_hilbert2d(np,ny(1),nz(1),
193     >                        int_mb(p_map1(1,1)),int_mb(q_map1(1,1)),
194     >                        int_mb(p_map1(1,2)),int_mb(q_map1(1,2)))
195      end if
196      call generate_map_indexes(taskid,np,
197     >                          nz(nb),nx(nb),
198     >                          int_mb(p_map2(1,nb)),
199     >                          int_mb(q_map2(1,nb)),nq2(nb))
200      call generate_map_indexes(taskid,np,
201     >                          nx(nb),ny(nb),
202     >                          int_mb(p_map3(1,nb)),
203     >                          int_mb(q_map3(1,nb)),nq3(nb))
204
205c      if (taskid.eq.0) then
206c      write(*,*) taskid,"nq2=",nq2(nb), ny(nb)*nq2(nb)
207c      write(*,*) taskid,"nq1=",nq1(nb), nx(nb)*nq1(nb)
208c      write(*,*) taskid,"nq3=",nq3(nb), nz(nb)*nq3(nb)
209c      write(*,*) 'hilbert map1 nb=',nb
210c      do j=0,nz(nb)-1
211c        write(*,'(A,80I4)') 'hilbert map:',
212c     >   (int_mb(p_map1(1,nb)+k+j*ny(nb)), k=0,ny(nb)-1)
213c      end do
214c      write(*,*)
215c      write(*,*) 'hilbert map2 nb=',nb
216c      do j=0,nx(nb)-1
217c        write(*,'(A,80I4)') 'hilbert map:',
218c     >   (int_mb(p_map2(1,nb)+k+j*nz(nb)), k=0,nz(nb)-1)
219c      end do
220c      write(*,*)
221c      write(*,*) 'hilbert map3 nb=',nb
222c      do j=0,ny(nb)-1
223c        write(*,'(A,80I4)') 'hilbert map:',
224c     >   (int_mb(p_map3(1,nb)+k+j*nx(nb)), k=0,nx(nb)-1)
225c      end do
226c      write(*,*)
227c      end if
228
229      nfft3d(nb) = nx(nb)*nq1(nb)
230      if ((ny(nb)*nq2(nb)).gt.nfft3d(nb)) nfft3d(nb) = ny(nb)*nq2(nb)
231      if ((nz(nb)*nq3(nb)).gt.nfft3d(nb)) nfft3d(nb) = nz(nb)*nq3(nb)
232      n2ft3d(nb) = nfft3d(nb)
233
234      nfft3d_map(nb) = nz(nb)*nq3(nb)
235      n2ft3d_map(nb) = nx(nb)*nq1(nb)
236
237      end if
238
239      return
240      end
241
242*     ***********************************
243*     *					*
244*     *	          C3dB_end   		*
245*     *					*
246*     ***********************************
247      subroutine C3dB_end(nb)
248      implicit none
249      integer nb
250
251#include "bafdecls.fh"
252#include "errquit.fh"
253#include "C3dB.fh"
254
255
256*     *** hilbert tranpose data structure ****
257      integer h_iq_to_i1(2,6,NBLOCKS)
258      integer h_iq_to_i2(2,6,NBLOCKS)
259      integer h_i1_start(2,6,NBLOCKS)
260      integer h_i2_start(2,6,NBLOCKS)
261      common / c_trans_blk_ijk / h_iq_to_i1,
262     >                           h_iq_to_i2,
263     >                           h_i1_start,
264     >                           h_i2_start
265
266      integer iq_to_i1(2,NBLOCKS)
267      integer iq_to_i2(2,NBLOCKS)
268      integer i1_start(2,NBLOCKS)
269      integer i2_start(2,NBLOCKS)
270      common / c_trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
271
272#ifndef MPI
273      integer Nchannels(NBLOCKS)
274      integer channel_proc(2,NBLOCKS)
275      integer channel_type(2,NBLOCKS)
276      common / c_channel_blk / channel_proc,channel_type,Nchannels
277#endif
278
279      logical value
280      integer i
281
282
283      call C3dB_fft_end(nb)
284      value = .true.
285
286      !**** slab mappings ****
287      if (mapping.eq.1) then
288      value = value.and.BA_free_heap(q_map(2,nb))
289      value = value.and.BA_free_heap(p_map(2,nb))
290      value = value.and.BA_free_heap(k_map(2,nb))
291      end if
292
293      !**** hilbert mappings ****
294      if (mapping.eq.2) then
295      value = value.and.BA_free_heap(q_map1(2,nb))
296      value = value.and.BA_free_heap(p_map1(2,nb))
297      value = value.and.BA_free_heap(q_map2(2,nb))
298      value = value.and.BA_free_heap(p_map2(2,nb))
299      value = value.and.BA_free_heap(q_map3(2,nb))
300      value = value.and.BA_free_heap(p_map3(2,nb))
301      end if
302
303
304      !**** slab transpose mappings ****
305      if (mapping.eq.1) then
306      value = value.and.BA_free_heap(i1_start(2,nb))
307      value = value.and.BA_free_heap(i2_start(2,nb))
308      value = value.and.BA_free_heap(iq_to_i1(2,nb))
309      value = value.and.BA_free_heap(iq_to_i2(2,nb))
310      end if
311
312      !**** hilbert transpose mappings ****
313      if (mapping.eq.2) then
314      do i=1,6
315      value = value.and.BA_free_heap(h_i1_start(2,i,nb))
316      value = value.and.BA_free_heap(h_i2_start(2,i,nb))
317      value = value.and.BA_free_heap(h_iq_to_i1(2,i,nb))
318      value = value.and.BA_free_heap(h_iq_to_i2(2,i,nb))
319      end do
320      end if
321
322
323
324#ifndef MPI
325      value = value.and.BA_free_heap(channel_proc(2,nb))
326      value = value.and.BA_free_heap(channel_type(2,nb))
327#endif
328
329      if (.not. value)
330     > call errquit('C3dB_end:freeing heap memory',0, MA_ERR)
331      return
332      end
333
334*     ***********************************
335*     *					*
336*     *	          C3dB_qtok   		*
337*     *					*
338*     ***********************************
339
340      subroutine C3dB_qtok(nb,q,k)
341      implicit none
342      integer nb
343      integer q,k
344
345#include "bafdecls.fh"
346#include "C3dB.fh"
347
348      k = int_mb(k_map(1,nb)+q-1)
349
350      return
351      end
352
353*     ***********************************
354*     *					*
355*     *	          C3dB_ktoqp  		*
356*     *					*
357*     ***********************************
358
359      subroutine C3dB_ktoqp(nb,k,q,p)
360      implicit none
361      integer nb
362      integer k,q,p
363
364#include "bafdecls.fh"
365#include "C3dB.fh"
366
367      q = int_mb(q_map(1,nb)+k-1)
368      p = int_mb(p_map(1,nb)+k-1)
369      return
370      end
371
372
373*     ***********************************
374*     *                                 *
375*     *           C3dB_ijktoindexp      *
376*     *                                 *
377*     ***********************************
378
379      subroutine C3dB_ijktoindexp(nb,i,j,k,indx,p)
380      implicit none
381      integer nb
382      integer i,j,k
383      integer indx,p
384
385#include "bafdecls.fh"
386#include "C3dB.fh"
387
388      integer q
389
390      !**** slab mapping ****
391      if (mapping.eq.1) then
392      q = int_mb(q_map(1,nb)+k-1)
393      p = int_mb(p_map(1,nb)+k-1)
394
395      indx = i + (j-1)*(nx(nb)) + (q-1)*(nx(nb))*ny(nb)
396
397      !**** hilbert mapping ****
398      else
399      q = int_mb(q_map3(1,nb)+(i-1)+(j-1)*nx(nb))
400      p = int_mb(p_map3(1,nb)+(i-1)+(j-1)*nx(nb))
401
402      indx = k + (q-1)*nz(nb)
403
404
405      end if
406
407      return
408      end
409
410
411
412*     ***********************************
413*     *                                 *
414*     *           C3dB_ijktoindex1p     *
415*     *                                 *
416*     ***********************************
417
418      subroutine C3dB_ijktoindex1p(nb,i,j,k,indx,p)
419      implicit none
420      integer nb
421      integer i,j,k
422      integer indx,p
423
424#include "bafdecls.fh"
425#include "C3dB.fh"
426
427      integer q
428
429      !**** slab mapping ***
430      if (mapping.eq.1) then
431      q = int_mb(q_map(1,nb)+j-1)
432      p = int_mb(p_map(1,nb)+j-1)
433
434      indx = i + (k-1)*nx(nb) + (q-1)*nx(nb)*nz(nb)
435
436      !**** hilbert mapping ****
437      else
438      q = int_mb(q_map2(1,nb)+(k-1)+(i-1)*(nz(nb)))
439      p = int_mb(p_map2(1,nb)+(k-1)+(i-1)*(nz(nb)))
440
441      indx = j + (q-1)*ny(nb)
442      end if
443
444      return
445      end
446
447
448
449
450*     ***********************************
451*     *                                 *
452*     *           C3dB_ijktoindex2p     *
453*     *                                 *
454*     ***********************************
455
456      subroutine C3dB_ijktoindex2p(nb,i,j,k,indx,p)
457      implicit none
458      integer nb
459      integer i,j,k
460      integer indx,p
461
462#include "bafdecls.fh"
463#include "C3dB.fh"
464
465
466      integer q
467
468      !**** slab mapping ****
469      if (mapping.eq.1) then
470      q = int_mb(q_map(1,nb)+j-1)
471      p = int_mb(p_map(1,nb)+j-1)
472
473      indx = i + (k-1)*(nx(nb)) + (q-1)*(nx(nb))*ny(nb)
474
475      !**** hilbert mapping ****
476      else
477      q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb))
478      p = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb))
479
480      indx = i + (q-1)*nx(nb)
481      end if
482
483
484      return
485      end
486
487
488
489
490
491*     ***********************************
492*     *                                 *
493*     *         C3dB_ijk_to_srqp        *
494*     *                                 *
495*     ***********************************
496
497      subroutine C3dB_ijk_to_srqp(nb,i,j,k, s,r,q,p)
498      implicit none
499      integer nb
500      integer i,j,k
501      integer s,r,q,p
502
503#include "bafdecls.fh"
504#include "C3dB.fh"
505
506
507c     q = q_map(k)
508c     p = p_map(k)
509
510      s = i
511      r = j
512      q = int_mb(q_map(1,nb)+k-1)
513      p = int_mb(p_map(1,nb)+k-1)
514      return
515      end
516
517
518
519
520*     ***********************************
521*     *					*
522*     *	        C3dB_nfft3d		*
523*     *					*
524*     ***********************************
525
526      subroutine C3dB_nfft3d(nb,nfft3d_out)
527      implicit none
528      integer nb
529      integer nfft3d_out
530
531#include "C3dB.fh"
532
533      nfft3d_out = nfft3d(nb)
534      return
535      end
536
537
538
539*     ***********************************
540*     *                                 *
541*     *         C3dB_nfft3d_map         *
542*     *                                 *
543*     ***********************************
544
545      subroutine C3dB_nfft3d_map(nb,nfft3d_out)
546      implicit none
547      integer nb
548      integer nfft3d_out
549
550#include "C3dB.fh"
551
552      nfft3d_out = nfft3d_map(nb)
553      return
554      end
555
556
557*     ***********************************
558*     *					*
559*     *	        C3dB_n2ft3d		*
560*     *					*
561*     ***********************************
562
563      subroutine C3dB_n2ft3d(nb,n2ft3d_out)
564      implicit none
565      integer nb
566      integer n2ft3d_out
567
568#include "C3dB.fh"
569
570      n2ft3d_out = n2ft3d(nb)
571      return
572      end
573
574
575*     ***********************************
576*     *                                 *
577*     *         C3dB_n2ft3d_map         *
578*     *                                 *
579*     ***********************************
580
581      subroutine C3dB_n2ft3d_map(nb,n2ft3d_out)
582      implicit none
583      integer nb
584      integer n2ft3d_out
585
586#include "C3dB.fh"
587
588      n2ft3d_out = n2ft3d_map(nb)
589      return
590      end
591
592
593*     ***********************************
594*     *                                 *
595*     *         C3dB_nqq                *
596*     *                                 *
597*     ***********************************
598
599      subroutine C3dB_nqq(nb,nqtmp)
600      implicit none
601      integer nb
602      integer nqtmp
603
604#include "C3dB.fh"
605
606      !**** slab mapping ****
607      if (mapping.eq.1) then
608         nqtmp = ny(nb)*nq(nb)
609      !**** hilbert mapping ****
610      else
611         nqtmp = nq(nb)
612      end if
613
614      return
615      end
616
617
618
619*     ***********************************
620*     *					*
621*     *	        C3dB_nq			*
622*     *					*
623*     ***********************************
624
625      subroutine C3dB_nq(nb,nqtmp)
626      implicit none
627      integer nb
628      integer nqtmp
629
630#include "C3dB.fh"
631
632      nqtmp = nq(nb)
633
634      return
635      end
636
637*     ***********************************
638*     *					*
639*     *	        C3dB_nx			*
640*     *					*
641*     ***********************************
642
643      subroutine C3dB_nx(nb,nxtmp)
644      implicit none
645      integer nb
646      integer nxtmp
647
648#include "C3dB.fh"
649
650      nxtmp = nx(nb)
651      return
652      end
653
654*     ***********************************
655*     *					*
656*     *	        C3dB_ny			*
657*     *					*
658*     ***********************************
659
660      subroutine C3dB_ny(nb,nytmp)
661      implicit none
662      integer nb
663      integer nytmp
664
665#include "C3dB.fh"
666
667      nytmp = ny(nb)
668      return
669      end
670
671*     ***********************************
672*     *					*
673*     *	        C3dB_nz			*
674*     *					*
675*     ***********************************
676
677      subroutine C3dB_nz(nb,nztmp)
678      implicit none
679      integer nb
680      integer nztmp
681
682#include "C3dB.fh"
683
684      nztmp = nz(nb)
685      return
686      end
687
688
689*     ***********************************
690*     *					*
691*     *	        C3dB_Init		*
692*     *					*
693*     ***********************************
694
695      subroutine C3dB_Init(nb,nx_in,ny_in,nz_in,map_in)
696      implicit none
697      integer nb
698      integer nx_in,ny_in,nz_in
699      integer map_in
700
701#include "C3dB.fh"
702
703      !**** local variables ****
704      integer MASTER
705      parameter (MASTER=0)
706      integer taskid,np
707
708      call Parallel3d_np_i(np)
709      call Parallel_taskid(taskid)
710
711
712      !**** Make sure ngrid is consistent with mapping ***
713      if (map_in.eq.1) then
714        if ((np.gt.nz_in).or.(ny_in.ne.nz_in)) then
715          if (taskid.eq.MASTER) then
716            write(6,*) 'Error: for slab decomposition the',
717     >                 ' number of processors must ',
718     >                 ' be in the range ( 1 ...ngrid(3)=',
719     >                   nz_in,')'
720           write(6,*) ' and ngrid(2) == ngrid(3), ',
721     >                ' ngrid(2)=',ny_in,
722     >                ' ngrid(3)=',nz_in
723          end if
724          call errquit('C3dB_Init: mapping error',0,0)
725        end if
726        if (mod(nx_in,2).ne.0) then
727          if (taskid.eq.MASTER) then
728           write(6,*)
729     >      'Error: ngrid(1) must be even (ngrid(1) mod 2 == 0)'
730           write(6,*) 'Error: ngrid(1)=',nx_in
731          end if
732          call errquit('C3dB_Init: slab mapping error',0,0)
733        end if
734      end if
735
736
737      if (map_in.ge.2) then
738        if (np.gt.(ny_in*nz_in)) then
739          if (taskid.eq.MASTER) then
740           write(6,*) 'Error: np > MIN(ngrid(2)*ngrid(3),',
741     >                ' ngrid(1)*ngrid(2),',
742     >                ' ngrid(1)*ngrid(3))'
743           write(6,*) 'Error: np > ngrid(2)*ngrid(3)'
744           write(6,*) 'Error: for the Hilbert decomposition the',
745     >                 ' the number of processors must ',
746     >                 ' be in the range ( 1 ...',
747     >                   ny_in*nz_in,')'
748          end if
749          call errquit('C3dB_Init: Hilbert mapping error',0,0)
750        end if
751        if (np.gt.(nx_in*ny_in)) then
752          if (taskid.eq.MASTER) then
753           write(6,*) 'Error: np > MIN(ngrid(2)*ngrid(3),',
754     >                ' ngrid(1)*ngrid(2),',
755     >                ' ngrid(1)*ngrid(3))'
756           write(6,*) 'Error: np > ngrid(1)*ngrid(2)'
757           write(6,*) 'Error: for the Hilbert decomposition the',
758     >                 ' the number of processors must ',
759     >                 ' be in the range ( 1 ...',
760     >                   nx_in*ny_in,')'
761          end if
762          call errquit('C3dB_Init: Hilbert mapping error',0,0)
763        end if
764        if (np.gt.(nx_in*nz_in)) then
765          if (taskid.eq.MASTER) then
766           write(6,*) 'Error: np > MIN(ngrid(2)*ngrid(3),',
767     >                ' ngrid(1)*ngrid(2),',
768     >                ' ngrid(1)*ngrid(3))'
769           write(6,*) 'Error: np > ngrid(1)*ngrid(3)'
770           write(6,*) 'Error: for the Hilbert decomposition the',
771     >                 ' the number of processors must ',
772     >                 ' be in the range ( 1 ...',
773     >                   nx_in*nz_in,')'
774          end if
775          call errquit('C3dB_Init: Hilbert mapping error',0,0)
776        end if
777        if (mod(nx_in,2).ne.0) then
778          if (taskid.eq.MASTER) then
779           write(6,*)
780     >      'Error: ngrid(1) must be even (ngrid(1) mod 2 == 0)'
781           write(6,*) 'Error: ngrid(1)=',nx_in
782          end if
783          call errquit('C3dB_Init: Hilbert mapping error',0,0)
784        end if
785      end if
786
787
788*     ***** initialize C3dB common block *****
789      nx(nb)     = nx_in
790      ny(nb)     = ny_in
791      nz(nb)     = nz_in
792      mapping    = map_in
793      mapping2d  = 1
794      if (mapping.eq.3) then
795         mapping   = 2
796         mapping2d = 2
797      end if
798
799
800*     **** do other initializations ****
801      call Mapping_Init_C3dB(nb)
802      if (mapping.eq.1) call C3dB_c_transpose_jk_init(nb)
803      if (mapping.eq.2) call C3dB_c_transpose_ijk_init(nb)
804
805#ifndef MPI
806      call C3dB_channel_init(nb)
807#endif
808
809      call C3dB_fft_init(nb)
810
811      return
812      end
813
814
815*     ***********************************
816*     *					*
817*     *	        C3dB_(c,r)_Zero  	*
818*     *					*
819*     ***********************************
820
821      subroutine C3dB_c_Zero(nb,A)
822      implicit none
823      integer nb
824      complex*16 A(*)
825
826#include "C3dB.fh"
827
828      call dcopy(2*nfft3d_map(nb),0.0d0,0,A,1)
829      return
830      end
831
832
833      subroutine C3dB_r_Zero(nb,A)
834      implicit none
835      integer nb
836      real*8  A(*)
837
838
839#include "C3dB.fh"
840
841      call dcopy(n2ft3d_map(nb),0.0d0,0,A,1)
842      return
843      end
844
845
846
847*     ***********************************
848*     *					*
849*     *	        C3dB_(c,r)_Copy	*
850*     *					*
851*     ***********************************
852
853      subroutine C3dB_c_Copy(nb,A,B)
854      implicit none
855      integer nb
856      complex*16 A(*)
857      complex*16 B(*)
858
859#include "C3dB.fh"
860
861      call dcopy(2*nfft3d_map(nb),A,1,B,1)
862      return
863      end
864
865      subroutine C3dB_r_Copy(nb,A,B)
866      implicit none
867      integer nb
868      real*8 A(*)
869      real*8 B(*)
870
871#include "C3dB.fh"
872
873      call dcopy(n2ft3d_map(nb),A,1,B,1)
874      return
875      end
876
877      subroutine C3dB_t_Copy(nb,A,B)
878      implicit none
879      integer nb
880      real*8 A(*)
881      real*8 B(*)
882
883#include "C3dB.fh"
884
885      call dcopy(nfft3d_map(nb),A,1,B,1)
886      return
887      end
888
889*     ***********************************
890*     *					*
891*     *	        C3dB_ct_Copy      	*
892*     *					*
893*     ***********************************
894
895      subroutine C3dB_ct_Copy(nb,A,B)
896      implicit none
897      integer nb
898      complex*16 A(*)
899      real*8     B(*)
900
901#include "C3dB.fh"
902
903      integer i
904
905      do i=1,nfft3d_map(nb)
906         B(i) = dble(A(i))
907      end do
908      return
909      end
910
911*     ***********************************
912*     *                                 *
913*     *         C3dB_tc_Copy            *
914*     *                                 *
915*     ***********************************
916
917      subroutine C3dB_tc_Copy(nb,A,B)
918      implicit none
919      integer nb
920      real*8     A(*)
921      complex*16 B(*)
922
923#include "C3dB.fh"
924
925      integer i
926
927!$OMP DO
928      do i=1,nfft3d_map(nb)
929         B(i) = dcmplx(A(i),0.0d0)
930      end do
931!$OMP END DO
932      return
933      end
934
935
936
937
938*     ***********************************
939*     *                                 *
940*     *         C3dB_fft_init           *
941*     *                                 *
942*     ***********************************
943
944      subroutine C3dB_fft_init(nb)
945      implicit none
946      integer nb
947
948#include "bafdecls.fh"
949#include "errquit.fh"
950
951#include "C3dB.fh"
952
953
954      integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS)
955      common    / C3dB_fft / tmpx,tmpy,tmpz
956
957      logical value
958
959
960      value = BA_alloc_get(mt_dcpl,(nfft3d(nb)),
961     >        'fttmpx',tmpx(2,nb),tmpx(1,nb))
962      value = value.and.
963     >        BA_alloc_get(mt_dcpl,(nfft3d(nb)),
964     >        'fttmpy',tmpy(2,nb),tmpy(1,nb))
965      value = value.and.
966     >        BA_alloc_get(mt_dcpl,(nfft3d(nb)),
967     >        'fttmpz',tmpz(2,nb),tmpz(1,nb))
968      if (.not. value)
969     >   call errquit('C3dB_fft_init:out of heap memory',0, MA_ERR)
970
971
972#ifdef MLIB
973      call z1dfft(dcpl_mb(tmpx(1,nb)),nx(nb),
974     >            dcpl_mb(tmpx(1,nb)),-3,ierr)
975      call z1dfft(dcpl_mb(tmpx(1,nb)),ny(nb),
976     >            dcpl_mb(tmpy(1,nb)),-3,ierr)
977      call z1dfft(dcpl_mb(tmpx(1,nb)),nz(nb),
978     >            dcpl_mb(tmpz(1,nb)),-3,ierr)
979
980#else
981      call dcffti(nx(nb),dcpl_mb(tmpx(1,nb)))
982      call dcffti(ny(nb),dcpl_mb(tmpy(1,nb)))
983      call dcffti(nz(nb),dcpl_mb(tmpz(1,nb)))
984#endif
985
986      return
987      end
988
989
990*     ***********************************
991*     *                                 *
992*     *         C3dB_fft_end            *
993*     *                                 *
994*     ***********************************
995
996      subroutine C3dB_fft_end(nb)
997      implicit none
998      integer nb
999
1000#include "bafdecls.fh"
1001#include "errquit.fh"
1002
1003#include "C3dB.fh"
1004
1005
1006      integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS)
1007      common    / C3dB_fft / tmpx,tmpy,tmpz
1008
1009      logical value
1010
1011      value =           BA_free_heap(tmpx(2,nb))
1012      value = value.and.BA_free_heap(tmpy(2,nb))
1013      value = value.and.BA_free_heap(tmpz(2,nb))
1014      if (.not.value)
1015     >   call errquit(
1016     >   'C3dB_fft_end:error deallocatingof heap memory',0, MA_ERR)
1017
1018      return
1019      end
1020
1021
1022
1023
1024
1025*     ***********************************
1026*     *					*
1027*     *	        C3dB_cr_fft3b		*
1028*     *					*
1029*     ***********************************
1030
1031      subroutine C3dB_cr_fft3b(nb,A)
1032
1033*****************************************************
1034*                                                   *
1035*      This routine performs the operation of       *
1036*      a three dimensional complex to complex       *
1037*      inverse fft                                  *
1038*           A(nx,ny(nb),nz(nb)) <- FFT3^(-1)[A(kx,ky,kz)]   *
1039*                                                   *
1040*      Entry - 					    *
1041*              A: a column distribuded 3d block     *
1042*              tmp: tempory work space must be at   *
1043*                    least the size of (complex)    *
1044*                    (nfft*nfft + 1) + 10*nfft      *
1045*                                                   *
1046*       Exit - A is transformed and the imaginary   *
1047*              part of A is set to zero             *
1048*       uses - C3dB_c_transpose_jk, dcopy           *
1049*                                                   *
1050*****************************************************
1051
1052      implicit none
1053      integer nb
1054      complex*16  A(*)
1055
1056#include "bafdecls.fh"
1057#include "C3dB.fh"
1058#include "errquit.fh"
1059
1060
1061      integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS)
1062      common    / C3dB_fft / tmpx,tmpy,tmpz
1063
1064
1065*     *** local variables ***
1066      integer i,j,q,indx
1067
1068c     complex*16  tmp1(*)
1069c     complex*16  tmp2(*)
1070c     real*8      tmp3(*)
1071      !integer nfft3d
1072      integer tmp1(2),tmp2(2),ierr
1073      logical value
1074
1075
1076
1077      call nwpw_timing_start(1)
1078
1079*     ***** allocate temporary space ****
1080      !call C3dB_nfft3d(nb,nfft3d)
1081      value = BA_push_get(mt_dcpl,(nfft3d(nb)),
1082     >                    'ffttmp1',tmp1(2),tmp1(1))
1083      value = value.and.
1084     >        BA_push_get(mt_dcpl,(nfft3d(nb)),
1085     >                   'ffttmp2',tmp2(2),tmp2(1))
1086      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
1087
1088
1089
1090
1091      !**********************
1092      !**** slab mapping ****
1093      !**********************
1094      if (mapping.eq.1) then
1095
1096*     ********************************************
1097*     ***         Do a transpose of A          ***
1098*     ***      A(kx,kz,ky) <- A(kx,ky,kz)      ***
1099*     ********************************************
1100c     call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dbl_mb(tmp3(1)))
1101
1102*     *************************************************
1103*     ***     do fft along kz dimension             ***
1104*     ***   A(kx,nz(nb),ky) <- fft1d^(-1)[A(kx,kz,ky)]  ***
1105*     *************************************************
1106#ifdef MLIB
1107      !call dcffti(nz(nb),dcpl_mb(tmp1(1)))
1108      do q=1,nq(nb)
1109      do i=1,nx(nb)
1110         indx = i + (q-1)*nx(nb)*nz(nb)
1111         call zcopy(nz(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1)
1112         call z1dfft(dcpl_mb(tmp2(1)),nz(nb),
1113     >               dcpl_mb(tmpz(1,nb)),-2,ierr)
1114         call zcopy(nz(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb))
1115      end do
1116      end do
1117#else
1118      !call dcffti(nz(nb),dcpl_mb(tmp1(1)))
1119      do q=1,nq(nb)
1120      do i=1,nx(nb)
1121         indx = i + (q-1)*nx(nb)*nz(nb)
1122         call zcopy(nz(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1)
1123         call dcfftb(nz(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpz(1,nb)))
1124         call zcopy(nz(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb))
1125      end do
1126      end do
1127#endif
1128
1129*     ********************************************
1130*     ***         Do a transpose of A          ***
1131*     ***      A(kx,ky,nz(nb)) <- A(kx,nz(nb),ky)      ***
1132*     ********************************************
1133      call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dcpl_mb(tmp1(1)))
1134
1135*     *************************************************
1136*     ***     do fft along ky dimension             ***
1137*     ***   A(kx,ny(nb),nz(nb)) <- fft1d^(-1)[A(kx,ky,nz(nb))]  ***
1138*     *************************************************
1139#ifdef MLIB
1140      do q=1,nq(nb)
1141      do i=1,nx(nb)
1142         indx = i + (q-1)*nx(nb)*ny(nb)
1143         call zcopy(ny(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1)
1144         call z1dfft(dcpl_mb(tmp2(1)),ny(nb),
1145     >               dcpl_mb(tmpy(1,nb)),-2,ierr)
1146         call zcopy(ny(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb))
1147      end do
1148      end do
1149#else
1150      !call dcffti(ny(nb),dcpl_mb(tmp1(1)))
1151      do q=1,nq(nb)
1152      do i=1,nx(nb)
1153         indx = i + (q-1)*nx(nb)*ny(nb)
1154         call zcopy(ny(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1)
1155         call dcfftb(ny(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpy(1,nb)))
1156         call zcopy(ny(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb))
1157      end do
1158      end do
1159#endif
1160
1161*     *************************************************
1162*     ***     do fft along kx dimension             ***
1163*     ***   A(nx(nb),ny(nb),nz(nb)) <- fft1d^(-1)[A(kx,ny(nb),nz(nb))]  ***
1164*     *************************************************
1165#ifdef MLIB
1166      indx = 1
1167      do q=1,nq(nb)
1168      do j=1,ny(nb)
1169         call z1dfft(A(indx),nx(nb),
1170     >               dcpl_mb(tmpx(1,nb)),-2,ierr)
1171         indx = indx + nx(nb)
1172      end do
1173      end do
1174#else
1175      !call dcffti(nx(nb),dcpl_mb(tmp1(1)))
1176      do q=1,nq(nb)
1177      do j=1,ny(nb)
1178         indx = 1 + (j-1)*nx(nb) + (q-1)*nx(nb)*ny(nb)
1179         call zcopy(nx(nb),A(indx),1,dcpl_mb(tmp2(1)),1)
1180         call dcfftb(nx(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpx(1,nb)))
1181         call zcopy(nx(nb),dcpl_mb(tmp2(1)),1,A(indx),1)
1182      end do
1183      end do
1184#endif
1185      !*************************
1186      !**** hilbert mapping ****
1187      !*************************
1188      else
1189
1190*     *************************************************
1191*     ***     do fft along kz dimension             ***
1192*     ***   A(nz(nb),kx,ky) <- fft1d^(-1)[A(kz,kx,ky)]  ***
1193*     *************************************************
1194#ifdef MLIB
1195      indx = 1
1196      do q=1,nq3(nb)
1197         !indx = 1 + (q-1)*nz(nb)
1198         call z1dfft(A(indx),nz(nb),dcpl_mb(tmpz(1,nb)),-2,ierr)
1199         indx = indx + nz(nb)
1200      end do
1201#else
1202      indx = 1
1203      do q=1,nq3(nb)
1204         !indx = 1 + (q-1)*nz(nb)
1205         call dcfftb(nz(nb),A(indx),dcpl_mb(tmpz(1,nb)))
1206         indx = indx + nz(nb)
1207      end do
1208#endif
1209
1210      call C3dB_c_transpose_ijk(nb,3,A,dcpl_mb(tmp1(1)),
1211     >                                 dcpl_mb(tmp2(1)))
1212
1213
1214*     *************************************************
1215*     ***     do fft along ky dimension             ***
1216*     ***   A(ny(nb),nz(nb),kx) <- fft1d^(-1)[A(ky,nz(nb),kx)]  ***
1217*     *************************************************
1218#ifdef MLIB
1219      indx = 1
1220      do q=1,nq2(nb)
1221         !indx = 1 + (q-1)*ny(nb)
1222         call z1dfft(A(indx),ny(nb),dcpl_mb(tmpy(1,nb)),-2,ierr)
1223         indx = indx + ny(nb)
1224      end do
1225#else
1226      indx = 1
1227      do q=1,nq2(nb)
1228         !indx = 1 + (q-1)*ny(nb)
1229         call dcfftb(ny(nb),A(indx),dcpl_mb(tmpy(1,nb)))
1230         indx = indx + ny(nb)
1231      end do
1232#endif
1233
1234      call C3dB_c_transpose_ijk(nb,4,A,dcpl_mb(tmp1(1)),
1235     >                                 dcpl_mb(tmp2(1)))
1236
1237*     *************************************************
1238*     ***     do fft along kx dimension             ***
1239*     ***   A(nx(nb),ny(nb),nz(nb)) <- fft1d^(-1)[A(kx,ny(nb),nz(nb))]  ***
1240*     *************************************************
1241#ifdef MLIB
1242      indx = 1
1243      do q=1,nq1(nb)
1244         !indx = 1 + (q-1)*nx(nb)
1245         call z1dfft(A(indx),nx(nb),dcpl_mb(tmpx(1,nb)),-2,ierr)
1246         indx = indx + nx(nb)
1247      end do
1248#else
1249      indx = 1
1250      do q=1,nq1(nb)
1251         !indx = 1 + (q-1)*ny(nb)
1252         call dcfftb(nx(nb),A(indx),dcpl_mb(tmpx(1,nb)))
1253         indx = indx + nx(nb)
1254      end do
1255#endif
1256
1257
1258
1259      end if
1260
1261*     **** deallocate temporary space  ****
1262      value = BA_pop_stack(tmp2(2))
1263      value = BA_pop_stack(tmp1(2))
1264
1265      call nwpw_timing_end(1)
1266      return
1267      end
1268
1269
1270
1271
1272
1273*     ***********************************
1274*     *					*
1275*     *	        C3dB_rc_fft3f		*
1276*     *					*
1277*     ***********************************
1278
1279      subroutine C3dB_rc_fft3f(nb,A)
1280
1281*****************************************************
1282*                                                   *
1283*      This routine performs the operation of       *
1284*      a three dimensional complex to complex fft   *
1285*           A(kx,ky,kz) <- FFT3[A(nx(nb),ny(nb),nz(nb))]        *
1286*                                                   *
1287*      Entry - 					    *
1288*              A: a column distribuded 3d block     *
1289*              tmp: tempory work space must be at   *
1290*                    least the size of (complex)    *
1291*                    (nfft*nfft + 1) + 10*nfft      *
1292*                                                   *
1293*       Exit - A is transformed                     *
1294*                                                   *
1295*       uses - transpose1 subroutine                *
1296*                                                   *
1297*****************************************************
1298
1299      implicit none
1300      integer nb
1301      complex*16  A(*)
1302
1303#include "bafdecls.fh"
1304#include "C3dB.fh"
1305#include "errquit.fh"
1306
1307
1308      integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS)
1309      common    / C3dB_fft / tmpx,tmpy,tmpz
1310
1311
1312*     *** local variables ***
1313      integer i,j,q,indx
1314
1315      !integer nfft3d
1316      integer tmp1(2),tmp2(2)
1317      logical value
1318
1319
1320      call nwpw_timing_start(1)
1321
1322*     ***** allocate temporary space ****
1323      !call C3dB_nfft3d(nb,nfft3d)
1324      value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',tmp1(2),tmp1(1))
1325      value = value.and.
1326     >        BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1))
1327      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
1328
1329
1330
1331      !**********************
1332      !**** slab mapping ****
1333      !**********************
1334      if (mapping.eq.1) then
1335
1336*     ********************************************
1337*     ***     do fft along nx(nb) dimension        ***
1338*     ***   A(kx,ny(nb),nz(nb)) <- fft1d[A(nx(nb),ny(nb),nz(nb))]  ***
1339*     ********************************************
1340      !call dcffti(nx(nb),dcpl_mb(tmp1(1)))
1341      do q=1,nq(nb)
1342      do j=1,ny(nb)
1343         indx = 1 + (j-1)*nx(nb) + (q-1)*nx(nb)*ny(nb)
1344         call zcopy((nx(nb)),A(indx),1,dcpl_mb(tmp2(1)),1)
1345         call dcfftf(nx(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpx(1,nb)))
1346         call zcopy(nx(nb),dcpl_mb(tmp2(1)),1,A(indx),1)
1347      end do
1348      end do
1349
1350*     ********************************************
1351*     ***     do fft along ny(nb) dimension        ***
1352*     ***   A(kx,ky,nz(nb)) <- fft1d[A(kx,ny(nb),nz(nb))]  ***
1353*     ********************************************
1354      !call dcffti(ny(nb),dcpl_mb(tmp1(1)))
1355      do q=1,nq(nb)
1356      do i=1,nx(nb)
1357         indx = i + (q-1)*nx(nb)*ny(nb)
1358         call zcopy(ny(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1)
1359         call dcfftf(ny(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpy(1,nb)))
1360         call zcopy(ny(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb))
1361      end do
1362      end do
1363
1364
1365*     ********************************************
1366*     ***         Do a transpose of A          ***
1367*     ***      A(ky,nz(nb),ky) <- A(kx,ky,nz(nb))      ***
1368*     ********************************************
1369      call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dcpl_mb(tmp1(1)))
1370
1371
1372*     ********************************************
1373*     ***     do fft along nz(nb) dimension        ***
1374*     ***   A(kx,kz,ky) <- fft1d[A(kx,nz(nb),ky)]  ***
1375*     ********************************************
1376      !call dcffti(nz(nb),dcpl_mb(tmp1(1)))
1377      do q=1,nq(nb)
1378      do i=1,nx(nb)
1379         indx = i + (q-1)*nx(nb)*ny(nb)
1380         call zcopy(nz(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1)
1381         call dcfftf(nz(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpz(1,nb)))
1382         call zcopy(nz(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb))
1383      end do
1384      end do
1385
1386*     ********************************************
1387*     ***         Do a transpose of A          ***
1388*     ***      A(kx,ky,kz) <- A(kx,kz,ky)      ***
1389*     ********************************************
1390c     call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dcpl_mb(tmp1(1)))
1391
1392
1393
1394      !*************************
1395      !**** hilbert mapping ****
1396      !*************************
1397      else
1398
1399*     ********************************************
1400*     ***     do fft along nx(nb) dimension        ***
1401*     ***   A(kx,ny(nb),nz(nb)) <- fft1d[A(nx(nb),ny(nb),nz(nb))]  ***
1402*     ********************************************
1403#ifdef MLIB
1404      indx = 1
1405      do q=1,nq1(nb)
1406         !indx = 1 + (q-1)*nx(nb)
1407         call z1dfft(A(indx),nx(nb),dcpl_mb(tmpx(1,nb)),1,ierr)
1408         indx = indx + nx(nb)
1409      end do
1410#else
1411      indx = 1
1412      do q=1,nq1(nb)
1413         !indx = 1 + (q-1)*nx(nb)
1414         call dcfftf(nx(nb),A(indx),dcpl_mb(tmpx(1,nb)))
1415         indx = indx + nx(nb)
1416      end do
1417#endif
1418
1419      call C3dB_c_transpose_ijk(nb,1,A,dcpl_mb(tmp1(1)),
1420     >                                 dcpl_mb(tmp2(1)))
1421
1422*     ********************************************
1423*     ***     do fft along ny(nb) dimension        ***
1424*     ***   A(ky,nz(nb),kx) <- fft1d[A(ny(nb),nz(nb),kx)]  ***
1425*     ********************************************
1426#ifdef MLIB
1427      indx = 1
1428      do q=1,nq2(nb)
1429         !indx = 1 + (q-1)*ny(nb)
1430         call z1dfft(A(indx),ny(nb),dcpl_mb(tmpy(1,nb)),1,ierr)
1431         indx = indx + ny(nb)
1432      end do
1433#else
1434      indx = 1
1435      do q=1,nq2(nb)
1436         !indx = 1 + (q-1)*ny(nb)
1437         call dcfftf(ny(nb),A(indx),dcpl_mb(tmpy(1,nb)))
1438         indx = indx + ny(nb)
1439      end do
1440#endif
1441
1442      call C3dB_c_transpose_ijk(nb,2,A,dcpl_mb(tmp1(1)),
1443     >                                 dcpl_mb(tmp2(1)))
1444
1445*     ********************************************
1446*     ***     do fft along nz(nb) dimension        ***
1447*     ***   A(kz,kx,ky) <- fft1d[A(nz(nb),kx,ky)]  ***
1448*     ********************************************
1449#ifdef MLIB
1450      indx = 1
1451      do q=1,nq3(nb)
1452         !indx = 1 + (q-1)*nz(nb)
1453         call z1dfft(A(indx),nz(nb),dcpl_mb(tmpz(1,nb)),1,ierr)
1454         indx = indx + nz(nb)
1455      end do
1456#else
1457      indx = 1
1458      do q=1,nq3(nb)
1459         !indx = 1 + (q-1)*nz(nb)
1460         call dcfftf(nz(nb),A(indx),dcpl_mb(tmpz(1,nb)))
1461         indx = indx + nz(nb)
1462      end do
1463#endif
1464
1465
1466
1467      end if
1468
1469*     **** deallocate temporary space  ****
1470      value = BA_pop_stack(tmp2(2))
1471      value = BA_pop_stack(tmp1(2))
1472
1473      call nwpw_timing_end(1)
1474      return
1475      end
1476
1477
1478
1479
1480
1481*     ***********************************
1482*     *					*
1483*     *	       C3dB_(c,r)_SMul 	        *
1484*     *					*
1485*     ***********************************
1486
1487*  This routine performs the operation	C = scale * A
1488* where scale is a real*8 number.
1489
1490      subroutine C3dB_c_SMul(nb,scale,A,C)
1491      implicit none
1492      integer    nb
1493      real*8     scale
1494      complex*16 A(*)
1495      complex*16 C(*)
1496
1497#include "C3dB.fh"
1498
1499      integer i
1500
1501!$OMP DO
1502      do i=1,nfft3d_map(nb)
1503         C(i) = scale*A(i)
1504      end do
1505!$OMP END DO
1506      return
1507      end
1508
1509
1510      subroutine C3dB_c_SMul1(nb,scale,A)
1511      implicit none
1512      integer    nb
1513      real*8     scale
1514      complex*16 A(*)
1515
1516#include "C3dB.fh"
1517
1518      integer i
1519
1520!$OMP DO
1521      do i=1,nfft3d_map(nb)
1522         A(i) = scale*A(i)
1523      end do
1524!$OMP END DO
1525      return
1526      end
1527
1528
1529      subroutine C3dB_b_SMul1(nb,scale,A)
1530      implicit none
1531      integer    nb
1532      real*8     scale
1533      complex*16 A(*)
1534#include "C3dB.fh"
1535      integer i
1536
1537!$OMP DO
1538      do i=1,n2ft3d_map(nb)
1539         A(i) = scale*A(i)
1540      end do
1541!$OMP END DO
1542      return
1543      end
1544
1545
1546
1547      subroutine C3dB_r_SMul(nb,scale,A,C)
1548      implicit none
1549      integer nb
1550      real*8     scale
1551      real*8 A(*)
1552      real*8 C(*)
1553
1554#include "C3dB.fh"
1555
1556      integer i
1557
1558!$OMP DO
1559      do i=1,n2ft3d_map(nb)
1560         C(i) = scale*A(i)
1561      end do
1562!$OMP END DO
1563      return
1564      end
1565
1566      subroutine C3dB_r_SMul1(nb,scale,A)
1567      implicit none
1568      integer nb
1569      real*8     scale
1570      real*8 A(*)
1571
1572#include "C3dB.fh"
1573
1574      integer i
1575
1576!$OMP DO
1577      do i=1,n2ft3d_map(nb)
1578         A(i) = scale*A(i)
1579      end do
1580!$OMP END DO
1581      return
1582      end
1583
1584
1585      subroutine C3dB_c_ZMul(nb,scale,A,C)
1586      implicit none
1587      integer    nb
1588      complex*16 scale
1589      complex*16 A(*)
1590      complex*16 C(*)
1591
1592#include "C3dB.fh"
1593
1594      integer i
1595
1596!$OMP DO
1597      do i=1,nfft3d_map(nb)
1598         C(i) = scale*A(i)
1599      end do
1600!$OMP END DO
1601      return
1602      end
1603
1604
1605*     ***********************************
1606*     *                                 *
1607*     *        C3dB_rc_SMul             *
1608*     *                                 *
1609*     ***********************************
1610
1611*  This routine performs the operation  C = scale * A
1612* where scale and A are real*8 numbers.
1613
1614      subroutine C3dB_rc_SMul(nb,scale,A,C)
1615      implicit none
1616      integer    nb
1617      real*8     scale
1618      real*8 A(*)
1619      complex*16 C(*)
1620
1621#include "C3dB.fh"
1622
1623      integer i
1624
1625!$OMP DO
1626      do i=1,n2ft3d_map(nb)
1627         C(i) = dcmplx(scale*A(i),0.0d0)
1628      end do
1629!$OMP END DO
1630      return
1631      end
1632
1633*     ***********************************
1634*     *					*
1635*     *	       C3dB_cr_aSqrpy	 	*
1636*     *					*
1637*     ***********************************
1638
1639*  This routine performs the operation	C = C + w*A * A
1640
1641      subroutine C3dB_cr_aSqrpy(nb,w,A,C)
1642      implicit none
1643      integer    nb
1644      real*8     w
1645      complex*16 A(*)
1646      real*8     C(*),ar,ai
1647
1648#include "C3dB.fh"
1649
1650      integer i
1651
1652!$OMP DO
1653      do i=1,n2ft3d_map(nb)
1654         ar=dble(A(i))
1655         ai=dimag(A(i))
1656         C(i) = C(i) + w*(ar*ar+ai*ai)
1657      end do
1658!$OMP END DO
1659      return
1660      end
1661
1662
1663
1664
1665*     ***********************************
1666*     *					*
1667*     *	       C3dB_cr_Sqr	 	*
1668*     *					*
1669*     ***********************************
1670
1671*  This routine performs the operation	C = A * A
1672
1673      subroutine C3dB_cr_Sqr(nb,A,C)
1674      implicit none
1675      integer    nb
1676      complex*16 A(*)
1677      real*8     C(*),ar,ai
1678
1679#include "C3dB.fh"
1680
1681      integer i
1682
1683!$OMP DO
1684      do i=1,n2ft3d_map(nb)
1685         ar=dble(A(i))
1686         ai=dimag(A(i))
1687         C(i) = ar*ar + ai*ai
1688      end do
1689!$OMP END DO
1690      return
1691      end
1692
1693
1694*     ***********************************
1695*     *                                 *
1696*     *        C3dB_cc_absSqrt1         *
1697*     *                                 *
1698*     ***********************************
1699
1700*  This routine performs the operation  A = sqrt(abs(A))
1701
1702      subroutine C3dB_cc_absSqrt1(nb,A)
1703      implicit none
1704      integer    nb
1705      complex*16 A(*)
1706      real*8     ar,ai
1707
1708#include "C3dB.fh"
1709
1710      integer i
1711
1712!$OMP DO
1713      do i=1,n2ft3d_map(nb)
1714         ar=dble(A(i))
1715         !ai=dimag(A(i))
1716         !A(i) = dcmplx(dsqrt(dsqrt(ar*ar + ai*ai)),0.0d0)
1717         A(i) = dcmplx(dsqrt(dabs(ar)),0.0d0)
1718      end do
1719!$OMP END DO
1720      return
1721      end
1722
1723
1724
1725*     ***********************************
1726*     *                                 *
1727*     *        C3dB_cr_real             *
1728*     *                                 *
1729*     ***********************************
1730
1731*  This routine performs the operation  C = real(A)
1732
1733      subroutine C3dB_cr_real(nb,A,C)
1734      implicit none
1735      integer    nb
1736      complex*16 A(*)
1737      real*8     C(*)
1738
1739#include "C3dB.fh"
1740
1741      integer i
1742
1743!$OMP DO
1744      do i=1,n2ft3d_map(nb)
1745         C(i) = dble(A(i))
1746      end do
1747!$OMP END DO
1748      return
1749      end
1750
1751
1752
1753*     ***********************************
1754*     *                                 *
1755*     *        C3dB_cr_imag             *
1756*     *                                 *
1757*     ***********************************
1758
1759*  This routine performs the operation  C = imag(A)
1760
1761      subroutine C3dB_cr_imag(nb,A,C)
1762      implicit none
1763      integer    nb
1764      complex*16 A(*)
1765      real*8     C(*)
1766
1767#include "C3dB.fh"
1768
1769      integer i
1770
1771!$OMP DO
1772      do i=1,n2ft3d_map(nb)
1773         C(i) = dimag(A(i))
1774      end do
1775!$OMP END DO
1776      return
1777      end
1778
1779
1780
1781
1782*     ***********************************
1783*     *                                 *
1784*     *        C3dB_ccr_Mul             *
1785*     *                                 *
1786*     ***********************************
1787
1788*  This routine performs the operation  C = dble(A * B)
1789
1790      subroutine C3dB_ccr_Mul(nb,A,B,C)
1791      implicit none
1792      integer    nb
1793      complex*16 A(*)
1794      complex*16 B(*)
1795      real*8     C(*)
1796
1797#include "C3dB.fh"
1798
1799      integer i
1800
1801!$OMP DO
1802      do i=1,n2ft3d_map(nb)
1803         C(i) = dble(A(i))*dble(B(i)) + dimag(A(i))*dimag(B(i))
1804      end do
1805!$OMP END DO
1806      return
1807      end
1808
1809
1810
1811*     ***********************************
1812*     *					*
1813*     *	       C3dB_rr_Sqr	 	*
1814*     *					*
1815*     ***********************************
1816
1817      subroutine C3dB_rr_Sqr(nb,A,C)
1818      implicit none
1819      integer nb
1820      real*8 A(*)
1821      real*8 C(*)
1822
1823#include "C3dB.fh"
1824
1825      integer i
1826
1827!$OMP DO
1828      do i=1,n2ft3d_map(nb)
1829         C(i) = A(i)**2
1830      end do
1831!$OMP END DO
1832      return
1833      end
1834
1835*     ***********************************
1836*     *					*
1837*     *	       C3dB_rr_Sqrt	 	*
1838*     *					*
1839*     ***********************************
1840
1841      subroutine C3dB_rr_Sqrt(nb,A,C)
1842      implicit none
1843      integer nb
1844      real*8 A(*)
1845      real*8 C(*)
1846
1847#include "C3dB.fh"
1848
1849      integer i
1850
1851!$OMP DO
1852      do i=1,n2ft3d_map(nb)
1853         C(i) = dsqrt(A(i))
1854      end do
1855!$OMP END DO
1856      return
1857      end
1858
1859
1860
1861
1862*     ***********************************
1863*     *					*
1864*     *	   C3dB_c_transpose_jk_init	*
1865*     *					*
1866*     ***********************************
1867
1868      subroutine C3dB_c_transpose_jk_init(nb)
1869      implicit none
1870      integer nb
1871
1872#include "bafdecls.fh"
1873#include "errquit.fh"
1874#include "C3dB.fh"
1875
1876c     integer iq_to_i1((NFFT1/2+1)*NFFT2*NSLABS)
1877c     integer iq_to_i2((NFFT1/2+1)*NFFT2*NSLABS)
1878c     integer i1_start(NFFT3+1)
1879c     integer i2_start(NFFT3+1)
1880      integer iq_to_i1(2,NBLOCKS)
1881      integer iq_to_i2(2,NBLOCKS)
1882      integer i1_start(2,NBLOCKS)
1883      integer i2_start(2,NBLOCKS)
1884      common / c_trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start
1885
1886
1887
1888*     **** local variables ****
1889      integer proc_to,proc_from
1890      integer pto,qto,np,taskid
1891      integer pfrom,qfrom
1892      integer phere,qhere
1893      integer index1,index2,itmp
1894      integer i,j,k,it
1895      logical value
1896
1897*     **** external functions ****
1898
1899*     **** allocate c_trans_blk common block ****
1900      value = BA_alloc_get(mt_int,(nx(nb)*ny(nb)*nq(nb)),
1901     >                     'iq_to_i1',iq_to_i1(2,nb),iq_to_i1(1,nb))
1902      value = BA_alloc_get(mt_int,(nx(nb)*ny(nb)*nq(nb)),
1903     >                     'iq_to_i2',iq_to_i2(2,nb),iq_to_i2(1,nb))
1904
1905      value = BA_alloc_get(mt_int,(nz(nb)+1),
1906     >                     'i1_start',i1_start(2,nb),i1_start(1,nb))
1907      value = BA_alloc_get(mt_int,(nz(nb)+1),
1908     >                     'i2_start',i2_start(2,nb),i2_start(1,nb))
1909
1910      call Parallel3d_taskid_i(taskid)
1911      call Parallel3d_np_i(np)
1912
1913      index1 = 1
1914      index2 = 1
1915      do it=0,np-1
1916         proc_to   = mod(taskid+it,np)
1917         proc_from = mod(taskid-it+np,np)
1918         int_mb(i1_start(1,nb)+it) = index1
1919         int_mb(i2_start(1,nb)+it) = index2
1920
1921         do k=1,nz(nb)
1922         do j=1,ny(nb)
1923
1924*           **** packing scheme ****
1925            call C3dB_ktoqp(nb,k,qhere,phere)
1926            call C3dB_ktoqp(nb,j,qto,pto)
1927            if ((phere.eq.taskid).and.(pto.eq.proc_to)) then
1928               do i=1,nx(nb)
1929                  itmp = i + (j-1)*nx(nb)
1930     >                     + (qhere-1)*nx(nb)*ny(nb)
1931                  int_mb(iq_to_i1(1,nb)+itmp-1) = index1
1932                  index1 = index1 + 1
1933               end do
1934            end if
1935
1936*           **** unpacking scheme ****
1937            call C3dB_ktoqp(nb,j,qhere,phere)
1938            call C3dB_ktoqp(nb,k,qfrom,pfrom)
1939            if ((phere.eq.taskid).and.(pfrom.eq.proc_from)) then
1940               do i=1,nx(nb)
1941                  itmp = i + (k-1)*nx(nb)
1942     >                     + (qhere-1)*nx(nb)*ny(nb)
1943                  int_mb(iq_to_i2(1,nb)+itmp-1) = index2
1944                  index2 = index2 + 1
1945               end do
1946            end if
1947         end do
1948         end do
1949      end do
1950      int_mb(i1_start(1,nb)+np) = index1
1951      int_mb(i2_start(1,nb)+np) = index2
1952
1953      return
1954      end
1955
1956
1957*     *******************************************
1958*     *						*
1959*     *	       C3dB_r_FormatWrite_reverse	*
1960*     *						*
1961*     *******************************************
1962
1963      subroutine C3dB_r_FormatWrite_reverse(nb,iunit,A,tmp)
1964      implicit none
1965      integer nb
1966      integer iunit
1967      real*8     A(*)
1968      real*8     tmp(*)
1969
1970#include "C3dB.fh"
1971#include "tcgmsg.fh"
1972#include "msgtypesf.h"
1973
1974      integer rcv_len,rcv_proc
1975
1976
1977*     *** local variables ***
1978      integer MASTER,taskid
1979      parameter(MASTER=0)
1980      integer p_from, p_here,q
1981      integer i,j,k,index
1982      integer dest,source,status,msglen,idum
1983
1984      call Parallel3d_taskid_i(taskid)
1985
1986      !**********************
1987      !**** slab mapping ****
1988      !**********************
1989      if (mapping.eq.1) then
1990
1991*     **** master node reads from file and distributes ****
1992      if (taskid.eq.MASTER) then
1993         do i=1,nx(nb)
1994         do j=1,ny(nb)
1995
1996            do k=1,nz(nb)
1997              call C3dB_ktoqp(nb,k,q,p_from)
1998              if (p_from.eq.MASTER) then
1999                 index = (q-1)*(nx(nb))*ny(nb)
2000     >                 + (j-1)*(nx(nb)) + i
2001                 tmp(k) = A(index)
2002              else
2003                 msglen  = 1
2004                 status  = msglen
2005                 source  = p_from
2006                 idum = -999
2007
2008                 call SND(9+MSGINT,idum,mitob(msglen),source,1)
2009                 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len,
2010     >                         source,rcv_proc,1)
2011
2012              end if
2013            end do
2014            write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb))
2015
2016         end do
2017         end do
2018
2019*     **** not master node ****
2020      else
2021         do i=1,nx(nb)
2022         do j=1,ny(nb)
2023
2024            do k=1,nz(nb)
2025              call C3dB_ktoqp(nb,k,q,p_here)
2026              if (p_here.eq.taskid) then
2027
2028                 index = (q-1)*(nx(nb))*ny(nb)
2029     >                 + (j-1)*(nx(nb)) + i
2030                 tmp(1) = A(index)
2031
2032                 msglen  = 1
2033                 dest    = MASTER
2034
2035                 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len,
2036     >                         dest,rcv_proc,1)
2037                 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
2038
2039              end if
2040            end do
2041
2042         end do
2043         end do
2044      end if
2045
2046      !*************************
2047      !**** hilbert mapping ****
2048      !*************************
2049      else
2050
2051
2052*     **** master node reads from file and distributes ****
2053      if (taskid.eq.MASTER) then
2054         do i=1,nx(nb)
2055         do j=1,ny(nb)
2056
2057            do k=1,nz(nb)
2058              call C3dB_ijktoindex2p(nb,i,j,k,index,p_from)
2059              if (p_from.eq.MASTER) then
2060                 tmp(k) = A(index)
2061              else
2062                 msglen  = 1
2063                 status  = msglen
2064                 source  = p_from
2065                 idum = -999
2066
2067                 call SND(9+MSGINT,idum,mitob(msglen),source,1)
2068                 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len,
2069     >                         source,rcv_proc,1)
2070              end if
2071            end do
2072            write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb))
2073
2074         end do
2075         end do
2076
2077*     **** not master node ****
2078      else
2079         do i=1,nx(nb)
2080         do j=1,ny(nb)
2081
2082            do k=1,nz(nb)
2083              call C3dB_ijktoindex2p(nb,i,j,k,index,p_here)
2084
2085              if (p_here.eq.taskid) then
2086
2087                 tmp(1) = A(index)
2088
2089                 msglen  = 1
2090                 dest    = MASTER
2091                 rcv_proc = MASTER
2092                 rcv_len  = mitob(1)
2093                 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len,
2094     >                         dest,rcv_proc,1)
2095                 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1)
2096              end if
2097            end do
2098
2099         end do
2100         end do
2101      end if
2102
2103      end if
2104
2105*     **** wait ****
2106      return
2107      end
2108
2109
2110
2111*     ***********************************
2112*     *					*
2113*     *	         C3dB_cc_dot  	 	*
2114*     *					*
2115*     ***********************************
2116
2117      subroutine C3dB_cc_dot(nb,A,B,sumall)
2118      implicit none
2119      integer nb
2120      real*8 A(*)
2121      real*8 B(*)
2122      real*8     sumall
2123
2124#include "C3dB.fh"
2125
2126      integer np
2127      real*8  sum
2128
2129
2130*     **** external functions ****
2131      real*8 ddot
2132      external ddot
2133
2134      call nwpw_timing_start(2)
2135
2136      call Parallel3d_np_i(np)
2137
2138*     **** sum up dot product on this node ****
2139      sum = ddot(nfft3d_map(nb),A(1),2,B(1),2)
2140     >    + ddot(nfft3d_map(nb),A(2),2,B(2),2)
2141
2142
2143
2144*     **** add up sums from other nodes ****
2145      if (np.gt.1) then
2146         call C3dB_SumAll(sum)
2147      end if
2148
2149      call nwpw_timing_end(2)
2150
2151      sumall = sum
2152      return
2153      end
2154
2155*     ***********************************
2156*     *					*
2157*     *	         C3dB_cc_idot  	 	*
2158*     *					*
2159*     ***********************************
2160
2161      subroutine C3dB_cc_idot(nb,A,B,sumall)
2162      implicit none
2163      integer nb
2164      real*8 A(*)
2165      real*8 B(*)
2166      real*8     sumall
2167
2168#include "C3dB.fh"
2169
2170      real*8  sum
2171
2172
2173      real*8   ddot
2174      external ddot
2175
2176      call nwpw_timing_start(2)
2177
2178*     **** sum up dot product on this node ****
2179      sum = ddot(nfft3d_map(nb),A(1),2,B(1),2)
2180     >    + ddot(nfft3d_map(nb),A(2),2,B(2),2)
2181
2182*     **** do not add up sums from other nodes ****
2183      call nwpw_timing_end(2)
2184
2185      sumall = sum
2186      return
2187      end
2188
2189
2190      subroutine C3dB_bb_idot(nb,A,B,sumall)
2191      implicit none
2192      integer nb
2193      real*8 A(*)
2194      real*8 B(*)
2195      real*8     sumall
2196
2197#include "C3dB.fh"
2198
2199      real*8  sum
2200
2201
2202      real*8   ddot
2203      external ddot
2204
2205      call nwpw_timing_start(2)
2206
2207*     **** sum up dot product on this node ****
2208      sum = ddot(n2ft3d_map(nb),A(1),2,B(1),2)
2209     >    + ddot(n2ft3d_map(nb),A(2),2,B(2),2)
2210
2211*     **** do not add up sums from other nodes ****
2212      call nwpw_timing_end(2)
2213
2214      sumall = sum
2215      return
2216      end
2217
2218
2219
2220*     ***********************************
2221*     *					*
2222*     *	         C3dB_rr_dot  	 	*
2223*     *					*
2224*     ***********************************
2225
2226      subroutine C3dB_rr_dot(nb,A,B,sumall)
2227      implicit none
2228      integer nb
2229      real*8  A(*)
2230      real*8  B(*)
2231      real*8  sumall
2232
2233#include "C3dB.fh"
2234
2235      integer np
2236      real*8  sum
2237
2238      real*8   ddot
2239      external ddot
2240
2241      call Parallel3d_np_i(np)
2242
2243*     **** sum up dot product on this node ****
2244      sum = ddot(n2ft3d_map(nb),A,1,B,1)
2245
2246*     **** add up sums from other nodes ****
2247      if (np.gt.1) then
2248         call C3dB_SumAll(sum)
2249      end if
2250
2251      sumall = sum
2252      return
2253      end
2254
2255
2256*     ***********************************
2257*     *                                 *
2258*     *          C3dB_tt_dot            *
2259*     *                                 *
2260*     ***********************************
2261
2262      subroutine C3dB_tt_dot(nb,A,B,sumall)
2263      implicit none
2264      integer nb
2265      real*8  A(*)
2266      real*8  B(*)
2267      real*8  sumall
2268
2269#include "C3dB.fh"
2270
2271      integer np
2272      real*8  sum
2273
2274      real*8   ddot
2275      external ddot
2276
2277      call Parallel3d_np_i(np)
2278
2279*     **** sum up dot product on this node ****
2280      sum = ddot(nfft3d_map(nb),A,1,B,1)
2281
2282*     **** add up sums from other nodes ****
2283      if (np.gt.1) then
2284         call C3dB_SumAll(sum)
2285      end if
2286
2287      sumall = sum
2288      return
2289      end
2290
2291*     ***********************************
2292*     *                                 *
2293*     *          C3dB_tt_idot           *
2294*     *                                 *
2295*     ***********************************
2296
2297      subroutine C3dB_tt_idot(nb,A,B,sumall)
2298      implicit none
2299      integer nb
2300      real*8  A(*)
2301      real*8  B(*)
2302      real*8  sumall
2303
2304#include "C3dB.fh"
2305
2306      integer np
2307      real*8  sum
2308
2309      real*8   ddot
2310      external ddot
2311
2312      call Parallel3d_np_i(np)
2313
2314*     **** sum up dot product on this node ****
2315      sum = ddot(nfft3d_map(nb),A,1,B,1)
2316
2317c*     **** add up sums from other nodes ****
2318c      if (np.gt.1) then
2319c         call C3dB_SumAll(sum)
2320c      end if
2321
2322      sumall = sum
2323      return
2324      end
2325
2326
2327*     ***********************************
2328*     *					*
2329*     *	         C3dB_rr_idot  	 	*
2330*     *					*
2331*     ***********************************
2332
2333      subroutine C3dB_rr_idot(nb,A,B,sumall)
2334      implicit none
2335      integer nb
2336      real*8  A(*)
2337      real*8  B(*)
2338      real*8  sumall
2339
2340#include "C3dB.fh"
2341
2342      integer np
2343      real*8  sum
2344
2345      real*8   ddot
2346      external ddot
2347
2348c      call Parallel3d_np_i(np)
2349
2350*     **** sum up dot product on this node ****
2351      sum = ddot(n2ft3d_map(nb),A,1,B,1)
2352
2353*     **** add up sums from other nodes ****
2354*     if (np.gt.1) then
2355*        call C3dB_SumAll(sum)
2356*     end if
2357
2358      sumall = sum
2359      return
2360      end
2361
2362
2363
2364*     ***********************************
2365*     *					*
2366*     *	         C3dB_cc_Mul  	 	*
2367*     *					*
2368*     ***********************************
2369
2370      subroutine C3dB_cc_Mul(nb,A,B,C)
2371      implicit none
2372      integer    nb
2373      complex*16 A(*)
2374      complex*16 B(*)
2375      complex*16 C(*)
2376
2377#include "C3dB.fh"
2378
2379      integer i
2380
2381!$OMP DO
2382      do i=1,nfft3d_map(nb)
2383         C(i) = dconjg(A(i)) * B(i)
2384      end do
2385!$OMP END DO
2386
2387      return
2388      end
2389
2390
2391
2392      subroutine C3dB_bb_Mul(nb,A,B,C)
2393      implicit none
2394      integer    nb
2395      complex*16 A(*)
2396      complex*16 B(*)
2397      complex*16 C(*)
2398#include "C3dB.fh"
2399      integer i
2400!$OMP DO
2401      do i=1,n2ft3d_map(nb)
2402         C(i) = dconjg(A(i)) * B(i)
2403      end do
2404!$OMP END DO
2405      return
2406      end
2407
2408
2409      subroutine C3dB_bb_ncMul(nb,A,B,C)
2410      implicit none
2411      integer    nb
2412      complex*16 A(*)
2413      complex*16 B(*)
2414      complex*16 C(*)
2415#include "C3dB.fh"
2416      integer i
2417!$OMP DO
2418      do i=1,n2ft3d_map(nb)
2419         C(i) = A(i) * B(i)
2420      end do
2421!$OMP END DO
2422      return
2423      end
2424
2425
2426
2427*     ***********************************
2428*     *                                 *
2429*     *          C3dB_cc_Mul2           *
2430*     *                                 *
2431*     ***********************************
2432
2433      subroutine C3dB_cc_Mul2(nb,A,B)
2434      implicit none
2435      integer    nb
2436      complex*16 A(*)
2437      complex*16 B(*)
2438
2439#include "C3dB.fh"
2440
2441      integer i
2442
2443!$OMP DO
2444      do i=1,nfft3d_map(nb)
2445         B(i) = dconjg(A(i)) * B(i)
2446      end do
2447!$OMP END DO
2448
2449      return
2450      end
2451
2452
2453*     ***********************************
2454*     *                                 *
2455*     *          C3dB_cc_Mul2c          *
2456*     *                                 *
2457*     ***********************************
2458
2459      subroutine C3dB_cc_Mul2c(nb,A,B)
2460      implicit none
2461      integer    nb
2462      complex*16 A(*)
2463      complex*16 B(*)
2464
2465#include "C3dB.fh"
2466
2467      integer i
2468
2469!$OMP DO
2470      do i=1,nfft3d_map(nb)
2471         B(i) = dconjg(B(i)) * A(i)
2472      end do
2473!$OMP END DO
2474
2475      return
2476      end
2477
2478
2479
2480
2481      subroutine C3dB_bb_Mul2c(nb,A,B)
2482      implicit none
2483      integer    nb
2484      complex*16 A(*)
2485      complex*16 B(*)
2486
2487#include "C3dB.fh"
2488
2489      integer i
2490
2491!$OMP DO
2492      do i=1,n2ft3d_map(nb)
2493         B(i) = dconjg(B(i)) * A(i)
2494      end do
2495!$OMP END DO
2496
2497      return
2498      end
2499
2500
2501*     ***********************************
2502*     *					*
2503*     *	         C3dB_lc_Mask  	 	*
2504*     *					*
2505*     ***********************************
2506
2507      subroutine C3dB_lc_Mask(nb,masker,A)
2508      implicit none
2509      integer    nb
2510      logical    masker(*)
2511      complex*16 A(*)
2512
2513#include "C3dB.fh"
2514
2515      integer i
2516
2517!$OMP DO
2518      do i=1,nfft3d_map(nb)
2519         if (masker(i)) A(i) = dcmplx(0.0d0,0.0d0)
2520      end do
2521!$OMP END DO
2522      return
2523      end
2524
2525*     ***********************************
2526*     *					*
2527*     *	         C3dB_lr_Mask  	 	*
2528*     *					*
2529*     ***********************************
2530
2531      subroutine C3dB_lr_Mask(nb,masker,A)
2532      implicit none
2533      integer   nb
2534      logical   masker(*)
2535      real*8    A(*)
2536
2537#include "C3dB.fh"
2538
2539      integer i
2540
2541!$OMP DO
2542      do i=1,nfft3d_map(nb)
2543         if (masker(i)) A(i) = 0.0d0
2544      end do
2545!$OMP END DO
2546      return
2547      end
2548
2549
2550*     ***********************************
2551*     *					*
2552*     *	         C3dB_rc_Mul  	 	*
2553*     *					*
2554*     ***********************************
2555
2556      subroutine C3dB_rc_Mul(nb,A,B,C)
2557      implicit none
2558      integer    nb
2559      real*8     A(*)
2560      complex*16 B(*)
2561      complex*16 C(*)
2562
2563#include "C3dB.fh"
2564
2565      integer i
2566
2567!$OMP DO
2568      do i=1,nfft3d_map(nb)
2569         C(i) = A(i) * B(i)
2570      end do
2571!$OMP END DO
2572
2573      return
2574      end
2575
2576*     ***********************************
2577*     *					*
2578*     *	         C3dB_rr_Mul  	 	*
2579*     *					*
2580*     ***********************************
2581
2582      subroutine C3dB_rr_Mul(nb,A,B,C)
2583      implicit none
2584      integer nb
2585      real*8 A(*)
2586      real*8 B(*)
2587      real*8 C(*)
2588
2589#include "C3dB.fh"
2590
2591      integer i
2592
2593!$OMP DO
2594      do i=1,n2ft3d_map(nb)
2595         C(i) = A(i) * B(i)
2596      end do
2597!$OMP END DO
2598
2599      return
2600      end
2601
2602*     ***********************************
2603*     *                                 *
2604*     *          C3dB_rr_Mul2           *
2605*     *                                 *
2606*     ***********************************
2607
2608      subroutine C3dB_rr_Mul2(nb,A,B)
2609      implicit none
2610      integer nb
2611      real*8 A(*)
2612      real*8 B(*)
2613
2614#include "C3dB.fh"
2615
2616      integer i
2617
2618!$OMP DO
2619      do i=1,n2ft3d_map(nb)
2620         B(i) = B(i) * A(i)
2621      end do
2622!$OMP END DO
2623
2624      return
2625      end
2626
2627
2628
2629*     ***********************************
2630*     *					*
2631*     *	         C3dB_cc_Sum  	 	*
2632*     *					*
2633*     ***********************************
2634
2635      subroutine C3dB_cc_Sum(nb,A,B,C)
2636      implicit none
2637      integer    nb
2638      complex*16 A(*)
2639      complex*16 B(*)
2640      complex*16 C(*)
2641
2642#include "C3dB.fh"
2643
2644      integer i
2645
2646!$OMP DO
2647      do i=1,nfft3d_map(nb)
2648         C(i) = A(i) + B(i)
2649      end do
2650!$OMP END DO
2651
2652      return
2653      end
2654
2655
2656*     ***********************************
2657*     *                                 *
2658*     *          C3dB_cc_Sum2            *
2659*     *                                 *
2660*     ***********************************
2661
2662      subroutine C3dB_cc_Sum2(nb,A,B)
2663      implicit none
2664      integer    nb
2665      complex*16 A(*)
2666      complex*16 B(*)
2667
2668#include "C3dB.fh"
2669
2670      integer i
2671
2672!$OMP DO
2673      do i=1,nfft3d_map(nb)
2674         B(i) = B(i) + A(i)
2675      end do
2676!$OMP END DO
2677
2678      return
2679      end
2680
2681
2682      subroutine C3dB_bb_Sum2(nb,A,B)
2683      implicit none
2684      integer    nb
2685      complex*16 A(*)
2686      complex*16 B(*)
2687
2688#include "C3dB.fh"
2689
2690      integer i
2691
2692!$OMP DO
2693      do i=1,n2ft3d_map(nb)
2694         B(i) = B(i) + A(i)
2695      end do
2696!$OMP END DO
2697
2698      return
2699      end
2700
2701
2702
2703*     ***********************************
2704*     *					*
2705*     *	         C3dB_rc_Sum  	 	*
2706*     *					*
2707*     ***********************************
2708
2709      subroutine C3dB_rc_Sum(nb,A,B,C)
2710      implicit none
2711      integer    nb
2712      real*8     A(*)
2713      complex*16 B(*)
2714      complex*16 C(*)
2715
2716#include "C3dB.fh"
2717
2718      integer i
2719
2720!$OMP DO
2721      do i=1,n2ft3d_map(nb)
2722         C(i) = A(i) + B(i)
2723      end do
2724!$OMP END DO
2725
2726      return
2727      end
2728
2729*     ***********************************
2730*     *                                 *
2731*     *          C3dB_rc_Sum2           *
2732*     *                                 *
2733*     ***********************************
2734
2735      subroutine C3dB_rc_Sum2(nb,A,B)
2736      implicit none
2737      integer    nb
2738      real*8     A(*)
2739      complex*16 B(*)
2740
2741#include "C3dB.fh"
2742
2743      integer i
2744
2745!$OMP DO
2746      do i=1,n2ft3d_map(nb)
2747         B(i) = B(i) + A(i)
2748      end do
2749!$OMP END DO
2750
2751      return
2752      end
2753
2754
2755
2756
2757*     ***********************************
2758*     *					*
2759*     *	         C3dB_rr_Sum  	 	*
2760*     *					*
2761*     ***********************************
2762
2763      subroutine C3dB_rr_Sum(nb,A,B,C)
2764      implicit none
2765      integer nb
2766      real*8  A(*)
2767      real*8  B(*)
2768      real*8  C(*)
2769
2770#include "C3dB.fh"
2771
2772      integer i
2773
2774!$OMP DO
2775      do i=1,n2ft3d_map(nb)
2776         C(i) = A(i) + B(i)
2777      end do
2778!$OMP END DO
2779
2780      return
2781      end
2782
2783
2784*     ***********************************
2785*     *                                 *
2786*     *          C3dB_rr_Sum2           *
2787*     *                                 *
2788*     ***********************************
2789
2790      subroutine C3dB_rr_Sum2(nb,A,B)
2791      implicit none
2792      integer nb
2793      real*8  A(*)
2794      real*8  B(*)
2795
2796#include "C3dB.fh"
2797
2798      integer i
2799
2800!$OMP DO
2801      do i=1,n2ft3d_map(nb)
2802         B(i) = B(i) + A(i)
2803      end do
2804!$OMP END DO
2805
2806      return
2807      end
2808
2809
2810
2811
2812*     ***********************************
2813*     *					*
2814*     *	         C3dB_rrc_Sum  	 	*
2815*     *					*
2816*     ***********************************
2817
2818      subroutine C3dB_rrc_Sum(nb,A,B,C)
2819      implicit none
2820      integer    nb
2821      real*8     A(*)
2822      real*8     B(*)
2823      complex*16 C(*)
2824
2825#include "C3dB.fh"
2826
2827      integer i
2828
2829!$OMP DO
2830      do i=1,n2ft3d_map(nb)
2831         C(i) = dcmplx((A(i) + B(i)),0.0d0)
2832      end do
2833!$OMP END DO
2834
2835      return
2836      end
2837
2838
2839*     ***********************************
2840*     *                                 *
2841*     *          C3dB_cc_Sub2           *
2842*     *                                 *
2843*     ***********************************
2844
2845      subroutine C3dB_cc_Sub2(nb,B,C)
2846      implicit none
2847      integer    nb
2848      complex*16 B(*)
2849      complex*16 C(*)
2850
2851#include "C3dB.fh"
2852
2853      integer i
2854
2855      do i=1,nfft3d_map(nb)
2856         C(i) = C(i) - B(i)
2857      end do
2858
2859      return
2860      end
2861
2862      subroutine C3dB_bb_Sub2(nb,B,C)
2863      implicit none
2864      integer    nb
2865      complex*16 B(*)
2866      complex*16 C(*)
2867
2868#include "C3dB.fh"
2869
2870      integer i
2871
2872!$OMP DO
2873      do i=1,n2ft3d_map(nb)
2874         C(i) = C(i) - B(i)
2875      end do
2876!$OMP END DO
2877      return
2878      end
2879
2880
2881
2882
2883*     ***********************************
2884*     *					*
2885*     *	         C3dB_cc_Sub  	 	*
2886*     *					*
2887*     ***********************************
2888
2889      subroutine C3dB_cc_Sub(nb,A,B,C)
2890      implicit none
2891      integer    nb
2892      complex*16 A(*)
2893      complex*16 B(*)
2894      complex*16 C(*)
2895
2896#include "C3dB.fh"
2897
2898      integer i
2899
2900!$OMP DO
2901      do i=1,nfft3d_map(nb)
2902         C(i) = A(i) - B(i)
2903      end do
2904!$OMP END DO
2905
2906      return
2907      end
2908
2909
2910*     ***********************************
2911*     *					*
2912*     *	         C3dB_rr_Sub  	 	*
2913*     *					*
2914*     ***********************************
2915
2916      subroutine C3dB_rr_Sub(nb,A,B,C)
2917      implicit none
2918      integer nb
2919      real*8  A(*)
2920      real*8  B(*)
2921      real*8  C(*)
2922
2923#include "C3dB.fh"
2924
2925      integer i
2926
2927!$OMP DO
2928      do i=1,n2ft3d_map(nb)
2929         C(i) = A(i) - B(i)
2930      end do
2931!$OMP END DO
2932
2933      return
2934      end
2935
2936
2937
2938
2939*     ***********************************
2940*     *					*
2941*     *	         C3dB_cc_zaxpy 	 	*
2942*     *					*
2943*     ***********************************
2944
2945      subroutine C3dB_cc_zaxpy(nb,alpha,A,B)
2946      implicit none
2947      integer    nb
2948      complex*16 alpha
2949      complex*16 A(*)
2950      complex*16 B(*)
2951
2952#include "C3dB.fh"
2953
2954      integer i
2955
2956!$OMP DO
2957      do i=1,nfft3d_map(nb)
2958         B(i) = B(i) + alpha*A(i)
2959      end do
2960!$OMP END DO
2961
2962      return
2963      end
2964
2965
2966
2967*     ***********************************
2968*     *					*
2969*     *	         C3dB_cc_daxpy 	 	*
2970*     *					*
2971*     ***********************************
2972
2973      subroutine C3dB_cc_daxpy(nb,alpha,A,B)
2974      implicit none
2975      integer    nb
2976      real*8     alpha
2977      complex*16 A(*)
2978      complex*16 B(*)
2979
2980#include "C3dB.fh"
2981
2982      integer i
2983
2984!$OMP DO
2985      do i=1,nfft3d_map(nb)
2986         B(i) = B(i) + alpha*A(i)
2987      end do
2988!$OMP END DO
2989
2990      return
2991      end
2992
2993*     ***********************************
2994*     *					*
2995*     *	         C3dB_rr_daxpy 	 	*
2996*     *					*
2997*     ***********************************
2998
2999      subroutine C3dB_rr_daxpy(nb,alpha,A,B)
3000      implicit none
3001      integer nb
3002      real*8  alpha
3003      real*8  A(*)
3004      real*8  B(*)
3005
3006#include "C3dB.fh"
3007
3008      integer i
3009
3010!$OMP DO
3011      do i=1,n2ft3d_map(nb)
3012         B(i) = B(i) + alpha* A(i)
3013      end do
3014!$OMP END DO
3015
3016      return
3017      end
3018
3019*     ***********************************
3020*     *                                 *
3021*     *          C3dB_rr_Divide         *
3022*     *                                 *
3023*     ***********************************
3024
3025      subroutine C3dB_rr_Divide(nb,A,B,C)
3026      implicit none
3027      integer nb
3028      real*8 A(*)
3029      real*8 B(*)
3030      real*8 C(*)
3031
3032#include "C3dB.fh"
3033
3034      real*8 eta
3035      parameter (eta=1.0d-9)
3036
3037      integer i
3038
3039
3040!$OMP DO
3041      do i=1,n2ft3d_map(nb)
3042         if (dabs(B(i)) .le. eta) then
3043           C(i) = 0.0d0
3044         else
3045           C(i) = A(i) / B(i)
3046         end if
3047      end do
3048!$OMP END DO
3049
3050      return
3051      end
3052
3053
3054*     ***********************************
3055*     *                                 *
3056*     *          C3dB_cc_Divide2        *
3057*     *                                 *
3058*     ***********************************
3059
3060      subroutine C3dB_cc_Divide2(nb,A,B)
3061      implicit none
3062      integer nb
3063      complex*16 A(*)
3064      complex*16 B(*)
3065
3066#include "C3dB.fh"
3067
3068      real*8 eta,ar,br
3069      parameter (eta=1.0d-9)
3070
3071      integer i
3072
3073!$OMP DO
3074      do i=1,nfft3d_map(nb)
3075         ar = dble(A(i))
3076         br = dble(B(i))
3077         if (dabs(ar) .le. eta) then
3078           B(i) = dcmplx(0.0d0,0.0d0)
3079         else
3080           B(i) = dcmplx(br/ar,0.0d0)
3081         end if
3082      end do
3083!$OMP END DO
3084
3085      return
3086      end
3087
3088
3089*     ***********************************
3090*     *                                 *
3091*     *          C3dB_r_abs1             *
3092*     *                                 *
3093*     ***********************************
3094      subroutine C3dB_r_abs1(nb,A)
3095      implicit none
3096      integer nb
3097      real*8 A(*)
3098
3099#include "C3dB.fh"
3100
3101      integer i
3102
3103!$OMP DO
3104      do i=1,n2ft3d_map(nb)
3105         A(i) = dabs(A(i))
3106      end do
3107!$OMP END DO
3108
3109      return
3110      end
3111
3112
3113*     ***********************************
3114*     *                                 *
3115*     *         C3dB_c_abs1             *
3116*     *                                 *
3117*     ***********************************
3118      subroutine C3dB_c_abs1(nb,A)
3119      implicit none
3120      integer nb
3121      complex*16 A(*)
3122
3123#include "C3dB.fh"
3124
3125      integer i
3126
3127!$OMP DO
3128      do i=1,nfft3d_map(nb)
3129         A(i) = dcmplx(dabs(dble(A(i))),dabs(dimag(A(i))))
3130      end do
3131!$OMP END DO
3132
3133      return
3134      end
3135
3136
3137*     ***********************************
3138*     *                                 *
3139*     *          C3dB_r_thresh          *
3140*     *                                 *
3141*     ***********************************
3142
3143      subroutine C3dB_r_thresh(nb,eta,A)
3144      implicit none
3145      integer nb
3146      real*8 eta
3147      real*8 A(*)
3148
3149#include "C3dB.fh"
3150
3151      integer i
3152
3153!$OMP DO
3154      do i=1,n2ft3d_map(nb)
3155         if (dabs(A(i)).lt.eta) A(i) = 0.0d0
3156      end do
3157!$OMP END DO
3158
3159      return
3160      end
3161
3162
3163
3164*     ***********************************
3165*     *                                 *
3166*     *          C3dB_rr_Minus          *
3167*     *                                 *
3168*     ***********************************
3169      subroutine C3dB_rr_Minus(nb,A,B,C)
3170      implicit none
3171      integer nb
3172      real*8  A(*)
3173      real*8  B(*)
3174      real*8  C(*)
3175
3176#include "C3dB.fh"
3177
3178      integer i
3179
3180!$OMP DO
3181      do i=1,n2ft3d_map(nb)
3182         C(i) = A(i) - B(i)
3183      end do
3184!$OMP END DO
3185
3186      return
3187      end
3188
3189
3190
3191
3192*     ***********************************
3193*     *					*
3194*     *	         C3dB_r_dsum  	 	*
3195*     *					*
3196*     ***********************************
3197
3198      subroutine C3dB_r_dsum(nb,A,sumall)
3199      implicit none
3200      integer nb
3201      real*8  A(*)
3202      real*8  sumall
3203
3204#include "C3dB.fh"
3205
3206      integer i,np
3207      real*8 sum
3208
3209      call Parallel3d_np_i(np)
3210
3211*     **** sum up dot product on this node ****
3212      sum = 0.0d0
3213      do i=1,n2ft3d_map(nb)
3214         sum = sum + A(i)
3215      end do
3216
3217*     **** add up sums from other nodes ****
3218      if (np.gt.1) then
3219        call C3dB_SumAll(sum)
3220      end if
3221
3222      sumall = sum
3223
3224      return
3225      end
3226
3227*     ***********************************
3228*     *					*
3229*     *	         C3dB_c_dsum  	 	*
3230*     *					*
3231*     ***********************************
3232
3233      subroutine C3dB_c_dsum(nb,A,sumall)
3234      implicit none
3235      integer nb
3236      complex*16  A(*)
3237      complex*16 sumall
3238
3239#include "C3dB.fh"
3240
3241      integer i,np
3242      complex*16 sum
3243
3244      call Parallel3d_np_i(np)
3245
3246*     **** sum up dot product on this node ****
3247      sum = dcmplx(0.0d0,0.0d0)
3248      do i=1,nfft3d_map(nb)
3249         sum = sum + A(i)
3250      end do
3251
3252*     **** add up sums from other nodes ****
3253      if (np.gt.1) then
3254        call C3dB_Vector_SumAll(2,sum)
3255      end if
3256
3257      sumall = sum
3258
3259      return
3260      end
3261
3262
3263
3264
3265*     ***********************************
3266*     *					*
3267*     *	     C3dB_cc_Vector_dot 	*
3268*     *					*
3269*     ***********************************
3270
3271      subroutine C3dB_cc_Vector_dot(nb,nnfft3d,nn,ne,A,B,sumall)
3272      implicit none
3273      integer    nb
3274      integer    nnfft3d,nn,ne
3275      real*8 A(*)
3276      real*8 B(*)
3277      real*8     sumall(nn,nn)
3278
3279#include "C3dB.fh"
3280
3281      integer np
3282      integer n,m,shift1,shift2
3283      real*8  sum
3284
3285      real*8   ddot
3286      external ddot
3287
3288      call nwpw_timing_start(2)
3289
3290      call Parallel3d_np_i(np)
3291
3292*     **** sum up dot product on this node ****
3293      do n=1,ne
3294      do m=n,ne
3295
3296        shift1 = 1 + (n-1)*nnfft3d*2
3297        shift2 = 1 + (m-1)*nnfft3d*2
3298
3299        sum = ddot(nfft3d_map(nb),A(shift1),2,B(shift2),2)
3300     >      + ddot(nfft3d_map(nb),A(shift1+1),2,B(shift2+1),2)
3301
3302         sumall(n,m) = sum
3303         sumall(m,n) = sum
3304      end do
3305      end do
3306
3307
3308*     **** add up sums from other nodes ****
3309      if (np.gt.1) then
3310         call C3dB_Vector_SumAll(nn*ne,sumall)
3311      end if
3312
3313      call nwpw_timing_end(2)
3314
3315      return
3316      end
3317
3318
3319
3320*     ***********************************
3321*     *					*
3322*     *	     C3dB_cc_Vector_ndot 	*
3323*     *					*
3324*     ***********************************
3325
3326      subroutine C3dB_cc_Vector_ndot(nb,nnfft3d,ne,A,B,sumall)
3327      implicit none
3328      integer    nb
3329      integer    nnfft3d,ne
3330      real*8 A(*)
3331      real*8 B(*)
3332      real*8     sumall(ne)
3333
3334#include "C3dB.fh"
3335
3336      integer np
3337      integer n,shift1
3338      real*8  sum
3339
3340
3341      real*8   ddot
3342      external ddot
3343
3344      call nwpw_timing_start(2)
3345
3346      call Parallel3d_np_i(np)
3347
3348*     **** sum up dot product on this node ****
3349      do n=1,ne
3350
3351        shift1 = 1 + (n-1)*nnfft3d*2
3352        sum = ddot(nfft3d_map(nb),A(shift1),2,B(1),2)
3353     >      + ddot(nfft3d_map(nb),A(shift1+1),2,B(2),2)
3354
3355        sumall(n) = sum
3356      end do
3357
3358*     **** add up sums from other nodes ****
3359      if (np.gt.1) then
3360         call C3dB_Vector_SumAll(ne,sumall)
3361      end if
3362
3363      call nwpw_timing_end(2)
3364      return
3365      end
3366
3367
3368
3369*     ***********************************
3370*     *                                 *
3371*     *          C3dB_ic_Mul            *
3372*     *                                 *
3373*     ***********************************
3374
3375      subroutine C3dB_ic_Mul(nb,A,B,C)
3376      implicit none
3377      integer    nb
3378      real*8     A(*)
3379      complex*16 B(*)
3380      complex*16 C(*)
3381
3382#include "C3dB.fh"
3383
3384      integer i
3385
3386!$OMP DO
3387      do i=1,nfft3d_map(nb)
3388            C(i) = dcmplx(0.0d0,A(i)) * B(i)
3389      end do
3390!$OMP END DO
3391
3392      return
3393      end
3394
3395
3396
3397*     ***********************************
3398*     *                                 *
3399*     *         C3dB_D3dB_r_Copy        *
3400*     *                                 *
3401*     ***********************************
3402
3403      subroutine C3dB_D3dB_r_Copy(nb,A_c3db,B_d3db)
3404      implicit none
3405      integer nb
3406      real*8     A_c3db(*)
3407      real*8     B_d3db(*)
3408
3409#include "C3dB.fh"
3410
3411      integer q,indx1,indx2,nqq
3412
3413      !**** slab mapping ****
3414      if (mapping.eq.1)  then
3415        nqq = nq(nb)*ny(nb)
3416      !**** hilbert mapping ****
3417      else
3418        nqq = nq1(nb)
3419      end if
3420
3421      indx1 = 1
3422      indx2 = 1
3423      do q=1,nqq
3424         call dcopy(nx(nb),A_c3db(indx1),1,B_d3dB(indx2),1)
3425         indx1 = indx1 + nx(nb)
3426         indx2 = indx2 + (nx(nb)+2)
3427      end do
3428      call dcopy(nqq,0.0d0,0,B_d3db(nx(nb)+1),nx(nb)+2)
3429      call dcopy(nqq,0.0d0,0,B_d3db(nx(nb)+2),nx(nb)+2)
3430
3431      return
3432      end
3433
3434
3435*     ***********************************
3436*     *                                 *
3437*     *         D3dB_C3dB_r_Copy        *
3438*     *                                 *
3439*     ***********************************
3440
3441      subroutine D3dB_C3dB_r_Copy(nb,A_d3db,B_c3db)
3442      implicit none
3443      integer nb
3444      real*8     A_d3db(*)
3445      real*8     B_c3db(*)
3446
3447#include "C3dB.fh"
3448
3449      integer q,indx1,indx2,nqq
3450
3451      !**** slab mapping ****
3452      if (mapping.eq.1)  then
3453        nqq = nq(nb)*ny(nb)
3454      !**** hilbert mapping ****
3455      else
3456        nqq = nq1(nb)
3457      end if
3458
3459      indx1 = 1
3460      indx2 = 1
3461      do q=1,nqq
3462         call dcopy(nx(nb),A_d3db(indx1),1,B_c3dB(indx2),1)
3463         indx1 = indx1 + (nx(nb)+2)
3464         indx2 = indx2 + nx(nb)
3465      end do
3466      return
3467      end
3468
3469
3470      subroutine C3dB_pfft_index1_copy(n,index,a,b)
3471      implicit none
3472      integer n
3473      integer index(*)
3474      complex*16  a(*),b(*)
3475      integer i
3476#ifndef CRAY
3477!DIR$ ivdep
3478#endif
3479!$OMP DO
3480      do i=1,n
3481        b(i) = a(index(i))
3482      end do
3483!$OMP END DO
3484      return
3485      end
3486
3487      subroutine C3dB_pfft_index2_copy(n,index,a,b)
3488      implicit none
3489      integer n
3490      integer index(*)
3491      complex*16  a(*),b(*)
3492      integer i
3493#ifndef CRAY
3494!DIR$ ivdep
3495#endif
3496!$OMP DO
3497      do i=1,n
3498        b(index(i)) = a(i)
3499      end do
3500!$OMP END DO
3501      return
3502      end
3503
3504      subroutine C3dB_pfft_index2_zero(n,index,a)
3505      implicit none
3506      integer n
3507      integer index(*)
3508      complex*16  a(*)
3509      integer i
3510#ifndef CRAY
3511!DIR$ ivdep
3512#endif
3513!$OMP DO
3514      do i=1,n
3515        a(index(i)) = 0.0d0
3516      end do
3517!$OMP END DO
3518      return
3519      end
3520
3521