1#define NBLOCKS 2 2* 3* $Id$ 4* 5 6* *********************************************************** 7* * * 8* * C3dB library * 9* * (NWChem implemenation, version 0.1) * 10* * * 11* * Author - Eric Bylaska * 12* * date - 11/16/01 * 13* * * 14* *********************************************************** 15* The C3dB (full complex distributed three-dimensional block) library 16*is to be used for handling three kinds of data structures. The first 17* data structure, denoted by "r", is a double precision array of 18* length (nx)*ny*nz. The second data structure, denoted by "c", is 19* a double complex array of length of (nx)*ny*nz. 20* 21* The two data structures are distributed across threads, p, in 22* the k (i.e. nz) dimension using a cyclic decomposition. So that 23* a "r" array A is defined as double precision A(nx,ny,nq) on 24* each thread. 25* 26* Where 27* np = number of threads 28* nq = ceil(nz/np). 29* 0 <= p < np 30* 1 <= q <= nq 31* 1 <= k <= nz 32* 33* The mapping of k -> q is defined as: 34* 35* k = ((q-1)*np + p) + 1 36* q = ((k-1) - p)/np + 1 37* p = (k-1) mod np 38* 39* Libraries used: mpi, blas, fftpack, and compressed_io 40* 41 42* common blocks used in this library: 43* 44* integer nq,nx,ny,nz 45* common / C3dB / nq,nx,ny,nz 46* 47* integer q_map(NFFT3),p_map(NFFT3),k_map(NFFT3) 48* common /C3dB_mapping / q_map,p_map,k_map 49* 50* integer iq_to_i1((NFFT1)*NFFT2*NSLABS) 51* integer iq_to_i2((NFFT1)*NFFT2*NSLABS) 52* integer i1_start(NPROCS+1) 53* integer i2_start(NPROCS+1) 54* common / c_trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 55 56 57* *********************************** 58* * * 59* * Mapping_Init_C3dB * 60* * * 61* *********************************** 62 63 subroutine Mapping_Init_C3dB(nb) 64 implicit none 65 integer nb 66 67#include "bafdecls.fh" 68#include "errquit.fh" 69#include "C3dB.fh" 70 71 72 integer k,q,p,j 73* integer kn 74 integer taskid,np 75 logical value 76 77 78 call Parallel3d_np_i(np) 79 call Parallel3d_taskid_i(taskid) 80 81 82* ************************** 83* ****** Slab mapping ****** 84* ************************** 85 if (mapping.eq.1) then 86 87 88* **** allocate q_map,p_map,k_map 89 value = BA_alloc_get(mt_int,nz(nb),'q_map',q_map(2,nb), 90 > q_map(1,nb)) 91 value = value.and.BA_alloc_get(mt_int,nz(nb),'p_map',p_map(2,nb), 92 > p_map(1,nb)) 93 value = value.and.BA_alloc_get(mt_int,nz(nb),'k_map',k_map(2,nb), 94 > k_map(1,nb)) 95 if (.not. value) 96 > call errquit('Mapping_init:out of heap memory',1, MA_ERR) 97 98 99 100* **************************** 101* ****** cyclic mapping ****** 102* **************************** 103 p = 0 104 q = 1 105 do k=1,nz(nb) 106 int_mb(q_map(1,nb)+k-1) = q 107 int_mb(p_map(1,nb)+k-1) = p 108 if (p .eq. taskid) nq(nb) = q 109 p = p+1 110 if (p .ge. np) then 111 p = 0 112 q = q + 1 113 end if 114 end do 115 116 do k=1,nz(nb) 117 if (int_mb(p_map(1,nb)+k-1) .eq. taskid) then 118 int_mb(k_map(1,nb)+int_mb(q_map(1,nb)+k-1)-1) = k 119 end if 120 end do 121 122 nfft3d(nb) = nx(nb)*ny(nb)*nq(nb) 123 n2ft3d(nb) = nfft3d(nb) 124 nfft3d_map(nb) = nfft3d(nb) 125 n2ft3d_map(nb) = n2ft3d(nb) 126 127 128* ****************************** 129* ****** Hilbert mappings ****** 130* ****************************** 131 else 132 133 134* **** allocate q_map1,p_map1,q_map2,p_map2,q_map3,p_map3 **** 135 value = BA_alloc_get(mt_int,ny(nb)*nz(nb), 136 > 'q_map1', 137 > q_map1(2,nb), 138 > q_map1(1,nb)) 139 value = value.and.BA_alloc_get(mt_int,ny(nb)*nz(nb), 140 > 'p_map1', 141 > p_map1(2,nb), 142 > p_map1(1,nb)) 143 144 value = value.and.BA_alloc_get(mt_int,nz(nb)*nx(nb), 145 > 'q_map2', 146 > q_map2(2,nb), 147 > q_map2(1,nb)) 148 value = value.and.BA_alloc_get(mt_int,nz(nb)*nx(nb), 149 > 'p_map2', 150 > p_map2(2,nb), 151 > p_map2(1,nb)) 152 153 value = value.and.BA_alloc_get(mt_int,ny(nb)*nx(nb), 154 > 'q_map3', 155 > q_map3(2,nb), 156 > q_map3(1,nb)) 157 value = value.and.BA_alloc_get(mt_int,ny(nb)*nx(nb), 158 > 'p_map3', 159 > p_map3(2,nb), 160 > p_map3(1,nb)) 161 if (.not. value) 162 > call errquit('Mapping_init:out of heap memory',1, MA_ERR) 163 164 165 !**** double grid map1 defined wrt to single grid **** 166 !**** makes expand and contract routines trivial parallel **** 167 if (mapping2d.eq.1) then 168 if (nb.eq.1) then 169 call hilbert2d_map(ny(nb),nz(nb),int_mb(p_map1(1,nb))) 170 end if 171 call hilbert2d_map(nz(nb),nx(nb),int_mb(p_map2(1,nb))) 172 call hilbert2d_map(nx(nb),ny(nb),int_mb(p_map3(1,nb))) 173 else 174 if (nb.eq.1) then 175 call hcurve_map(ny(nb),nz(nb),int_mb(p_map1(1,nb))) 176 end if 177 call hcurve_map(nz(nb),nx(nb),int_mb(p_map2(1,nb))) 178 call hcurve_map(nx(nb),ny(nb),int_mb(p_map3(1,nb))) 179 end if 180 181 182 183 !**** double grid map1 defined wrt to single grid **** 184 !**** makes expand and contract routines trivial parallel **** 185 if (nb.eq.1) then 186 call generate_map_indexes(taskid,np, 187 > ny(nb),nz(nb), 188 > int_mb(p_map1(1,nb)), 189 > int_mb(q_map1(1,nb)),nq1(nb)) 190 else 191 nq1(2) = 4*nq1(1) 192 call expand_hilbert2d(np,ny(1),nz(1), 193 > int_mb(p_map1(1,1)),int_mb(q_map1(1,1)), 194 > int_mb(p_map1(1,2)),int_mb(q_map1(1,2))) 195 end if 196 call generate_map_indexes(taskid,np, 197 > nz(nb),nx(nb), 198 > int_mb(p_map2(1,nb)), 199 > int_mb(q_map2(1,nb)),nq2(nb)) 200 call generate_map_indexes(taskid,np, 201 > nx(nb),ny(nb), 202 > int_mb(p_map3(1,nb)), 203 > int_mb(q_map3(1,nb)),nq3(nb)) 204 205c if (taskid.eq.0) then 206c write(*,*) taskid,"nq2=",nq2(nb), ny(nb)*nq2(nb) 207c write(*,*) taskid,"nq1=",nq1(nb), nx(nb)*nq1(nb) 208c write(*,*) taskid,"nq3=",nq3(nb), nz(nb)*nq3(nb) 209c write(*,*) 'hilbert map1 nb=',nb 210c do j=0,nz(nb)-1 211c write(*,'(A,80I4)') 'hilbert map:', 212c > (int_mb(p_map1(1,nb)+k+j*ny(nb)), k=0,ny(nb)-1) 213c end do 214c write(*,*) 215c write(*,*) 'hilbert map2 nb=',nb 216c do j=0,nx(nb)-1 217c write(*,'(A,80I4)') 'hilbert map:', 218c > (int_mb(p_map2(1,nb)+k+j*nz(nb)), k=0,nz(nb)-1) 219c end do 220c write(*,*) 221c write(*,*) 'hilbert map3 nb=',nb 222c do j=0,ny(nb)-1 223c write(*,'(A,80I4)') 'hilbert map:', 224c > (int_mb(p_map3(1,nb)+k+j*nx(nb)), k=0,nx(nb)-1) 225c end do 226c write(*,*) 227c end if 228 229 nfft3d(nb) = nx(nb)*nq1(nb) 230 if ((ny(nb)*nq2(nb)).gt.nfft3d(nb)) nfft3d(nb) = ny(nb)*nq2(nb) 231 if ((nz(nb)*nq3(nb)).gt.nfft3d(nb)) nfft3d(nb) = nz(nb)*nq3(nb) 232 n2ft3d(nb) = nfft3d(nb) 233 234 nfft3d_map(nb) = nz(nb)*nq3(nb) 235 n2ft3d_map(nb) = nx(nb)*nq1(nb) 236 237 end if 238 239 return 240 end 241 242* *********************************** 243* * * 244* * C3dB_end * 245* * * 246* *********************************** 247 subroutine C3dB_end(nb) 248 implicit none 249 integer nb 250 251#include "bafdecls.fh" 252#include "errquit.fh" 253#include "C3dB.fh" 254 255 256* *** hilbert tranpose data structure **** 257 integer h_iq_to_i1(2,6,NBLOCKS) 258 integer h_iq_to_i2(2,6,NBLOCKS) 259 integer h_i1_start(2,6,NBLOCKS) 260 integer h_i2_start(2,6,NBLOCKS) 261 common / c_trans_blk_ijk / h_iq_to_i1, 262 > h_iq_to_i2, 263 > h_i1_start, 264 > h_i2_start 265 266 integer iq_to_i1(2,NBLOCKS) 267 integer iq_to_i2(2,NBLOCKS) 268 integer i1_start(2,NBLOCKS) 269 integer i2_start(2,NBLOCKS) 270 common / c_trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 271 272#ifndef MPI 273 integer Nchannels(NBLOCKS) 274 integer channel_proc(2,NBLOCKS) 275 integer channel_type(2,NBLOCKS) 276 common / c_channel_blk / channel_proc,channel_type,Nchannels 277#endif 278 279 logical value 280 integer i 281 282 283 call C3dB_fft_end(nb) 284 value = .true. 285 286 !**** slab mappings **** 287 if (mapping.eq.1) then 288 value = value.and.BA_free_heap(q_map(2,nb)) 289 value = value.and.BA_free_heap(p_map(2,nb)) 290 value = value.and.BA_free_heap(k_map(2,nb)) 291 end if 292 293 !**** hilbert mappings **** 294 if (mapping.eq.2) then 295 value = value.and.BA_free_heap(q_map1(2,nb)) 296 value = value.and.BA_free_heap(p_map1(2,nb)) 297 value = value.and.BA_free_heap(q_map2(2,nb)) 298 value = value.and.BA_free_heap(p_map2(2,nb)) 299 value = value.and.BA_free_heap(q_map3(2,nb)) 300 value = value.and.BA_free_heap(p_map3(2,nb)) 301 end if 302 303 304 !**** slab transpose mappings **** 305 if (mapping.eq.1) then 306 value = value.and.BA_free_heap(i1_start(2,nb)) 307 value = value.and.BA_free_heap(i2_start(2,nb)) 308 value = value.and.BA_free_heap(iq_to_i1(2,nb)) 309 value = value.and.BA_free_heap(iq_to_i2(2,nb)) 310 end if 311 312 !**** hilbert transpose mappings **** 313 if (mapping.eq.2) then 314 do i=1,6 315 value = value.and.BA_free_heap(h_i1_start(2,i,nb)) 316 value = value.and.BA_free_heap(h_i2_start(2,i,nb)) 317 value = value.and.BA_free_heap(h_iq_to_i1(2,i,nb)) 318 value = value.and.BA_free_heap(h_iq_to_i2(2,i,nb)) 319 end do 320 end if 321 322 323 324#ifndef MPI 325 value = value.and.BA_free_heap(channel_proc(2,nb)) 326 value = value.and.BA_free_heap(channel_type(2,nb)) 327#endif 328 329 if (.not. value) 330 > call errquit('C3dB_end:freeing heap memory',0, MA_ERR) 331 return 332 end 333 334* *********************************** 335* * * 336* * C3dB_qtok * 337* * * 338* *********************************** 339 340 subroutine C3dB_qtok(nb,q,k) 341 implicit none 342 integer nb 343 integer q,k 344 345#include "bafdecls.fh" 346#include "C3dB.fh" 347 348 k = int_mb(k_map(1,nb)+q-1) 349 350 return 351 end 352 353* *********************************** 354* * * 355* * C3dB_ktoqp * 356* * * 357* *********************************** 358 359 subroutine C3dB_ktoqp(nb,k,q,p) 360 implicit none 361 integer nb 362 integer k,q,p 363 364#include "bafdecls.fh" 365#include "C3dB.fh" 366 367 q = int_mb(q_map(1,nb)+k-1) 368 p = int_mb(p_map(1,nb)+k-1) 369 return 370 end 371 372 373* *********************************** 374* * * 375* * C3dB_ijktoindexp * 376* * * 377* *********************************** 378 379 subroutine C3dB_ijktoindexp(nb,i,j,k,indx,p) 380 implicit none 381 integer nb 382 integer i,j,k 383 integer indx,p 384 385#include "bafdecls.fh" 386#include "C3dB.fh" 387 388 integer q 389 390 !**** slab mapping **** 391 if (mapping.eq.1) then 392 q = int_mb(q_map(1,nb)+k-1) 393 p = int_mb(p_map(1,nb)+k-1) 394 395 indx = i + (j-1)*(nx(nb)) + (q-1)*(nx(nb))*ny(nb) 396 397 !**** hilbert mapping **** 398 else 399 q = int_mb(q_map3(1,nb)+(i-1)+(j-1)*nx(nb)) 400 p = int_mb(p_map3(1,nb)+(i-1)+(j-1)*nx(nb)) 401 402 indx = k + (q-1)*nz(nb) 403 404 405 end if 406 407 return 408 end 409 410 411 412* *********************************** 413* * * 414* * C3dB_ijktoindex1p * 415* * * 416* *********************************** 417 418 subroutine C3dB_ijktoindex1p(nb,i,j,k,indx,p) 419 implicit none 420 integer nb 421 integer i,j,k 422 integer indx,p 423 424#include "bafdecls.fh" 425#include "C3dB.fh" 426 427 integer q 428 429 !**** slab mapping *** 430 if (mapping.eq.1) then 431 q = int_mb(q_map(1,nb)+j-1) 432 p = int_mb(p_map(1,nb)+j-1) 433 434 indx = i + (k-1)*nx(nb) + (q-1)*nx(nb)*nz(nb) 435 436 !**** hilbert mapping **** 437 else 438 q = int_mb(q_map2(1,nb)+(k-1)+(i-1)*(nz(nb))) 439 p = int_mb(p_map2(1,nb)+(k-1)+(i-1)*(nz(nb))) 440 441 indx = j + (q-1)*ny(nb) 442 end if 443 444 return 445 end 446 447 448 449 450* *********************************** 451* * * 452* * C3dB_ijktoindex2p * 453* * * 454* *********************************** 455 456 subroutine C3dB_ijktoindex2p(nb,i,j,k,indx,p) 457 implicit none 458 integer nb 459 integer i,j,k 460 integer indx,p 461 462#include "bafdecls.fh" 463#include "C3dB.fh" 464 465 466 integer q 467 468 !**** slab mapping **** 469 if (mapping.eq.1) then 470 q = int_mb(q_map(1,nb)+j-1) 471 p = int_mb(p_map(1,nb)+j-1) 472 473 indx = i + (k-1)*(nx(nb)) + (q-1)*(nx(nb))*ny(nb) 474 475 !**** hilbert mapping **** 476 else 477 q = int_mb(q_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 478 p = int_mb(p_map1(1,nb)+(j-1)+(k-1)*ny(nb)) 479 480 indx = i + (q-1)*nx(nb) 481 end if 482 483 484 return 485 end 486 487 488 489 490 491* *********************************** 492* * * 493* * C3dB_ijk_to_srqp * 494* * * 495* *********************************** 496 497 subroutine C3dB_ijk_to_srqp(nb,i,j,k, s,r,q,p) 498 implicit none 499 integer nb 500 integer i,j,k 501 integer s,r,q,p 502 503#include "bafdecls.fh" 504#include "C3dB.fh" 505 506 507c q = q_map(k) 508c p = p_map(k) 509 510 s = i 511 r = j 512 q = int_mb(q_map(1,nb)+k-1) 513 p = int_mb(p_map(1,nb)+k-1) 514 return 515 end 516 517 518 519 520* *********************************** 521* * * 522* * C3dB_nfft3d * 523* * * 524* *********************************** 525 526 subroutine C3dB_nfft3d(nb,nfft3d_out) 527 implicit none 528 integer nb 529 integer nfft3d_out 530 531#include "C3dB.fh" 532 533 nfft3d_out = nfft3d(nb) 534 return 535 end 536 537 538 539* *********************************** 540* * * 541* * C3dB_nfft3d_map * 542* * * 543* *********************************** 544 545 subroutine C3dB_nfft3d_map(nb,nfft3d_out) 546 implicit none 547 integer nb 548 integer nfft3d_out 549 550#include "C3dB.fh" 551 552 nfft3d_out = nfft3d_map(nb) 553 return 554 end 555 556 557* *********************************** 558* * * 559* * C3dB_n2ft3d * 560* * * 561* *********************************** 562 563 subroutine C3dB_n2ft3d(nb,n2ft3d_out) 564 implicit none 565 integer nb 566 integer n2ft3d_out 567 568#include "C3dB.fh" 569 570 n2ft3d_out = n2ft3d(nb) 571 return 572 end 573 574 575* *********************************** 576* * * 577* * C3dB_n2ft3d_map * 578* * * 579* *********************************** 580 581 subroutine C3dB_n2ft3d_map(nb,n2ft3d_out) 582 implicit none 583 integer nb 584 integer n2ft3d_out 585 586#include "C3dB.fh" 587 588 n2ft3d_out = n2ft3d_map(nb) 589 return 590 end 591 592 593* *********************************** 594* * * 595* * C3dB_nqq * 596* * * 597* *********************************** 598 599 subroutine C3dB_nqq(nb,nqtmp) 600 implicit none 601 integer nb 602 integer nqtmp 603 604#include "C3dB.fh" 605 606 !**** slab mapping **** 607 if (mapping.eq.1) then 608 nqtmp = ny(nb)*nq(nb) 609 !**** hilbert mapping **** 610 else 611 nqtmp = nq(nb) 612 end if 613 614 return 615 end 616 617 618 619* *********************************** 620* * * 621* * C3dB_nq * 622* * * 623* *********************************** 624 625 subroutine C3dB_nq(nb,nqtmp) 626 implicit none 627 integer nb 628 integer nqtmp 629 630#include "C3dB.fh" 631 632 nqtmp = nq(nb) 633 634 return 635 end 636 637* *********************************** 638* * * 639* * C3dB_nx * 640* * * 641* *********************************** 642 643 subroutine C3dB_nx(nb,nxtmp) 644 implicit none 645 integer nb 646 integer nxtmp 647 648#include "C3dB.fh" 649 650 nxtmp = nx(nb) 651 return 652 end 653 654* *********************************** 655* * * 656* * C3dB_ny * 657* * * 658* *********************************** 659 660 subroutine C3dB_ny(nb,nytmp) 661 implicit none 662 integer nb 663 integer nytmp 664 665#include "C3dB.fh" 666 667 nytmp = ny(nb) 668 return 669 end 670 671* *********************************** 672* * * 673* * C3dB_nz * 674* * * 675* *********************************** 676 677 subroutine C3dB_nz(nb,nztmp) 678 implicit none 679 integer nb 680 integer nztmp 681 682#include "C3dB.fh" 683 684 nztmp = nz(nb) 685 return 686 end 687 688 689* *********************************** 690* * * 691* * C3dB_Init * 692* * * 693* *********************************** 694 695 subroutine C3dB_Init(nb,nx_in,ny_in,nz_in,map_in) 696 implicit none 697 integer nb 698 integer nx_in,ny_in,nz_in 699 integer map_in 700 701#include "C3dB.fh" 702 703 !**** local variables **** 704 integer MASTER 705 parameter (MASTER=0) 706 integer taskid,np 707 708 call Parallel3d_np_i(np) 709 call Parallel_taskid(taskid) 710 711 712 !**** Make sure ngrid is consistent with mapping *** 713 if (map_in.eq.1) then 714 if ((np.gt.nz_in).or.(ny_in.ne.nz_in)) then 715 if (taskid.eq.MASTER) then 716 write(6,*) 'Error: for slab decomposition the', 717 > ' number of processors must ', 718 > ' be in the range ( 1 ...ngrid(3)=', 719 > nz_in,')' 720 write(6,*) ' and ngrid(2) == ngrid(3), ', 721 > ' ngrid(2)=',ny_in, 722 > ' ngrid(3)=',nz_in 723 end if 724 call errquit('C3dB_Init: mapping error',0,0) 725 end if 726 if (mod(nx_in,2).ne.0) then 727 if (taskid.eq.MASTER) then 728 write(6,*) 729 > 'Error: ngrid(1) must be even (ngrid(1) mod 2 == 0)' 730 write(6,*) 'Error: ngrid(1)=',nx_in 731 end if 732 call errquit('C3dB_Init: slab mapping error',0,0) 733 end if 734 end if 735 736 737 if (map_in.ge.2) then 738 if (np.gt.(ny_in*nz_in)) then 739 if (taskid.eq.MASTER) then 740 write(6,*) 'Error: np > MIN(ngrid(2)*ngrid(3),', 741 > ' ngrid(1)*ngrid(2),', 742 > ' ngrid(1)*ngrid(3))' 743 write(6,*) 'Error: np > ngrid(2)*ngrid(3)' 744 write(6,*) 'Error: for the Hilbert decomposition the', 745 > ' the number of processors must ', 746 > ' be in the range ( 1 ...', 747 > ny_in*nz_in,')' 748 end if 749 call errquit('C3dB_Init: Hilbert mapping error',0,0) 750 end if 751 if (np.gt.(nx_in*ny_in)) then 752 if (taskid.eq.MASTER) then 753 write(6,*) 'Error: np > MIN(ngrid(2)*ngrid(3),', 754 > ' ngrid(1)*ngrid(2),', 755 > ' ngrid(1)*ngrid(3))' 756 write(6,*) 'Error: np > ngrid(1)*ngrid(2)' 757 write(6,*) 'Error: for the Hilbert decomposition the', 758 > ' the number of processors must ', 759 > ' be in the range ( 1 ...', 760 > nx_in*ny_in,')' 761 end if 762 call errquit('C3dB_Init: Hilbert mapping error',0,0) 763 end if 764 if (np.gt.(nx_in*nz_in)) then 765 if (taskid.eq.MASTER) then 766 write(6,*) 'Error: np > MIN(ngrid(2)*ngrid(3),', 767 > ' ngrid(1)*ngrid(2),', 768 > ' ngrid(1)*ngrid(3))' 769 write(6,*) 'Error: np > ngrid(1)*ngrid(3)' 770 write(6,*) 'Error: for the Hilbert decomposition the', 771 > ' the number of processors must ', 772 > ' be in the range ( 1 ...', 773 > nx_in*nz_in,')' 774 end if 775 call errquit('C3dB_Init: Hilbert mapping error',0,0) 776 end if 777 if (mod(nx_in,2).ne.0) then 778 if (taskid.eq.MASTER) then 779 write(6,*) 780 > 'Error: ngrid(1) must be even (ngrid(1) mod 2 == 0)' 781 write(6,*) 'Error: ngrid(1)=',nx_in 782 end if 783 call errquit('C3dB_Init: Hilbert mapping error',0,0) 784 end if 785 end if 786 787 788* ***** initialize C3dB common block ***** 789 nx(nb) = nx_in 790 ny(nb) = ny_in 791 nz(nb) = nz_in 792 mapping = map_in 793 mapping2d = 1 794 if (mapping.eq.3) then 795 mapping = 2 796 mapping2d = 2 797 end if 798 799 800* **** do other initializations **** 801 call Mapping_Init_C3dB(nb) 802 if (mapping.eq.1) call C3dB_c_transpose_jk_init(nb) 803 if (mapping.eq.2) call C3dB_c_transpose_ijk_init(nb) 804 805#ifndef MPI 806 call C3dB_channel_init(nb) 807#endif 808 809 call C3dB_fft_init(nb) 810 811 return 812 end 813 814 815* *********************************** 816* * * 817* * C3dB_(c,r)_Zero * 818* * * 819* *********************************** 820 821 subroutine C3dB_c_Zero(nb,A) 822 implicit none 823 integer nb 824 complex*16 A(*) 825 826#include "C3dB.fh" 827 828 call dcopy(2*nfft3d_map(nb),0.0d0,0,A,1) 829 return 830 end 831 832 833 subroutine C3dB_r_Zero(nb,A) 834 implicit none 835 integer nb 836 real*8 A(*) 837 838 839#include "C3dB.fh" 840 841 call dcopy(n2ft3d_map(nb),0.0d0,0,A,1) 842 return 843 end 844 845 846 847* *********************************** 848* * * 849* * C3dB_(c,r)_Copy * 850* * * 851* *********************************** 852 853 subroutine C3dB_c_Copy(nb,A,B) 854 implicit none 855 integer nb 856 complex*16 A(*) 857 complex*16 B(*) 858 859#include "C3dB.fh" 860 861 call dcopy(2*nfft3d_map(nb),A,1,B,1) 862 return 863 end 864 865 subroutine C3dB_r_Copy(nb,A,B) 866 implicit none 867 integer nb 868 real*8 A(*) 869 real*8 B(*) 870 871#include "C3dB.fh" 872 873 call dcopy(n2ft3d_map(nb),A,1,B,1) 874 return 875 end 876 877 subroutine C3dB_t_Copy(nb,A,B) 878 implicit none 879 integer nb 880 real*8 A(*) 881 real*8 B(*) 882 883#include "C3dB.fh" 884 885 call dcopy(nfft3d_map(nb),A,1,B,1) 886 return 887 end 888 889* *********************************** 890* * * 891* * C3dB_ct_Copy * 892* * * 893* *********************************** 894 895 subroutine C3dB_ct_Copy(nb,A,B) 896 implicit none 897 integer nb 898 complex*16 A(*) 899 real*8 B(*) 900 901#include "C3dB.fh" 902 903 integer i 904 905 do i=1,nfft3d_map(nb) 906 B(i) = dble(A(i)) 907 end do 908 return 909 end 910 911* *********************************** 912* * * 913* * C3dB_tc_Copy * 914* * * 915* *********************************** 916 917 subroutine C3dB_tc_Copy(nb,A,B) 918 implicit none 919 integer nb 920 real*8 A(*) 921 complex*16 B(*) 922 923#include "C3dB.fh" 924 925 integer i 926 927!$OMP DO 928 do i=1,nfft3d_map(nb) 929 B(i) = dcmplx(A(i),0.0d0) 930 end do 931!$OMP END DO 932 return 933 end 934 935 936 937 938* *********************************** 939* * * 940* * C3dB_fft_init * 941* * * 942* *********************************** 943 944 subroutine C3dB_fft_init(nb) 945 implicit none 946 integer nb 947 948#include "bafdecls.fh" 949#include "errquit.fh" 950 951#include "C3dB.fh" 952 953 954 integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS) 955 common / C3dB_fft / tmpx,tmpy,tmpz 956 957 logical value 958 959 960 value = BA_alloc_get(mt_dcpl,(nfft3d(nb)), 961 > 'fttmpx',tmpx(2,nb),tmpx(1,nb)) 962 value = value.and. 963 > BA_alloc_get(mt_dcpl,(nfft3d(nb)), 964 > 'fttmpy',tmpy(2,nb),tmpy(1,nb)) 965 value = value.and. 966 > BA_alloc_get(mt_dcpl,(nfft3d(nb)), 967 > 'fttmpz',tmpz(2,nb),tmpz(1,nb)) 968 if (.not. value) 969 > call errquit('C3dB_fft_init:out of heap memory',0, MA_ERR) 970 971 972#ifdef MLIB 973 call z1dfft(dcpl_mb(tmpx(1,nb)),nx(nb), 974 > dcpl_mb(tmpx(1,nb)),-3,ierr) 975 call z1dfft(dcpl_mb(tmpx(1,nb)),ny(nb), 976 > dcpl_mb(tmpy(1,nb)),-3,ierr) 977 call z1dfft(dcpl_mb(tmpx(1,nb)),nz(nb), 978 > dcpl_mb(tmpz(1,nb)),-3,ierr) 979 980#else 981 call dcffti(nx(nb),dcpl_mb(tmpx(1,nb))) 982 call dcffti(ny(nb),dcpl_mb(tmpy(1,nb))) 983 call dcffti(nz(nb),dcpl_mb(tmpz(1,nb))) 984#endif 985 986 return 987 end 988 989 990* *********************************** 991* * * 992* * C3dB_fft_end * 993* * * 994* *********************************** 995 996 subroutine C3dB_fft_end(nb) 997 implicit none 998 integer nb 999 1000#include "bafdecls.fh" 1001#include "errquit.fh" 1002 1003#include "C3dB.fh" 1004 1005 1006 integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS) 1007 common / C3dB_fft / tmpx,tmpy,tmpz 1008 1009 logical value 1010 1011 value = BA_free_heap(tmpx(2,nb)) 1012 value = value.and.BA_free_heap(tmpy(2,nb)) 1013 value = value.and.BA_free_heap(tmpz(2,nb)) 1014 if (.not.value) 1015 > call errquit( 1016 > 'C3dB_fft_end:error deallocatingof heap memory',0, MA_ERR) 1017 1018 return 1019 end 1020 1021 1022 1023 1024 1025* *********************************** 1026* * * 1027* * C3dB_cr_fft3b * 1028* * * 1029* *********************************** 1030 1031 subroutine C3dB_cr_fft3b(nb,A) 1032 1033***************************************************** 1034* * 1035* This routine performs the operation of * 1036* a three dimensional complex to complex * 1037* inverse fft * 1038* A(nx,ny(nb),nz(nb)) <- FFT3^(-1)[A(kx,ky,kz)] * 1039* * 1040* Entry - * 1041* A: a column distribuded 3d block * 1042* tmp: tempory work space must be at * 1043* least the size of (complex) * 1044* (nfft*nfft + 1) + 10*nfft * 1045* * 1046* Exit - A is transformed and the imaginary * 1047* part of A is set to zero * 1048* uses - C3dB_c_transpose_jk, dcopy * 1049* * 1050***************************************************** 1051 1052 implicit none 1053 integer nb 1054 complex*16 A(*) 1055 1056#include "bafdecls.fh" 1057#include "C3dB.fh" 1058#include "errquit.fh" 1059 1060 1061 integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS) 1062 common / C3dB_fft / tmpx,tmpy,tmpz 1063 1064 1065* *** local variables *** 1066 integer i,j,q,indx 1067 1068c complex*16 tmp1(*) 1069c complex*16 tmp2(*) 1070c real*8 tmp3(*) 1071 !integer nfft3d 1072 integer tmp1(2),tmp2(2),ierr 1073 logical value 1074 1075 1076 1077 call nwpw_timing_start(1) 1078 1079* ***** allocate temporary space **** 1080 !call C3dB_nfft3d(nb,nfft3d) 1081 value = BA_push_get(mt_dcpl,(nfft3d(nb)), 1082 > 'ffttmp1',tmp1(2),tmp1(1)) 1083 value = value.and. 1084 > BA_push_get(mt_dcpl,(nfft3d(nb)), 1085 > 'ffttmp2',tmp2(2),tmp2(1)) 1086 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 1087 1088 1089 1090 1091 !********************** 1092 !**** slab mapping **** 1093 !********************** 1094 if (mapping.eq.1) then 1095 1096* ******************************************** 1097* *** Do a transpose of A *** 1098* *** A(kx,kz,ky) <- A(kx,ky,kz) *** 1099* ******************************************** 1100c call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dbl_mb(tmp3(1))) 1101 1102* ************************************************* 1103* *** do fft along kz dimension *** 1104* *** A(kx,nz(nb),ky) <- fft1d^(-1)[A(kx,kz,ky)] *** 1105* ************************************************* 1106#ifdef MLIB 1107 !call dcffti(nz(nb),dcpl_mb(tmp1(1))) 1108 do q=1,nq(nb) 1109 do i=1,nx(nb) 1110 indx = i + (q-1)*nx(nb)*nz(nb) 1111 call zcopy(nz(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1) 1112 call z1dfft(dcpl_mb(tmp2(1)),nz(nb), 1113 > dcpl_mb(tmpz(1,nb)),-2,ierr) 1114 call zcopy(nz(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb)) 1115 end do 1116 end do 1117#else 1118 !call dcffti(nz(nb),dcpl_mb(tmp1(1))) 1119 do q=1,nq(nb) 1120 do i=1,nx(nb) 1121 indx = i + (q-1)*nx(nb)*nz(nb) 1122 call zcopy(nz(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1) 1123 call dcfftb(nz(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpz(1,nb))) 1124 call zcopy(nz(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb)) 1125 end do 1126 end do 1127#endif 1128 1129* ******************************************** 1130* *** Do a transpose of A *** 1131* *** A(kx,ky,nz(nb)) <- A(kx,nz(nb),ky) *** 1132* ******************************************** 1133 call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dcpl_mb(tmp1(1))) 1134 1135* ************************************************* 1136* *** do fft along ky dimension *** 1137* *** A(kx,ny(nb),nz(nb)) <- fft1d^(-1)[A(kx,ky,nz(nb))] *** 1138* ************************************************* 1139#ifdef MLIB 1140 do q=1,nq(nb) 1141 do i=1,nx(nb) 1142 indx = i + (q-1)*nx(nb)*ny(nb) 1143 call zcopy(ny(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1) 1144 call z1dfft(dcpl_mb(tmp2(1)),ny(nb), 1145 > dcpl_mb(tmpy(1,nb)),-2,ierr) 1146 call zcopy(ny(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb)) 1147 end do 1148 end do 1149#else 1150 !call dcffti(ny(nb),dcpl_mb(tmp1(1))) 1151 do q=1,nq(nb) 1152 do i=1,nx(nb) 1153 indx = i + (q-1)*nx(nb)*ny(nb) 1154 call zcopy(ny(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1) 1155 call dcfftb(ny(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpy(1,nb))) 1156 call zcopy(ny(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb)) 1157 end do 1158 end do 1159#endif 1160 1161* ************************************************* 1162* *** do fft along kx dimension *** 1163* *** A(nx(nb),ny(nb),nz(nb)) <- fft1d^(-1)[A(kx,ny(nb),nz(nb))] *** 1164* ************************************************* 1165#ifdef MLIB 1166 indx = 1 1167 do q=1,nq(nb) 1168 do j=1,ny(nb) 1169 call z1dfft(A(indx),nx(nb), 1170 > dcpl_mb(tmpx(1,nb)),-2,ierr) 1171 indx = indx + nx(nb) 1172 end do 1173 end do 1174#else 1175 !call dcffti(nx(nb),dcpl_mb(tmp1(1))) 1176 do q=1,nq(nb) 1177 do j=1,ny(nb) 1178 indx = 1 + (j-1)*nx(nb) + (q-1)*nx(nb)*ny(nb) 1179 call zcopy(nx(nb),A(indx),1,dcpl_mb(tmp2(1)),1) 1180 call dcfftb(nx(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpx(1,nb))) 1181 call zcopy(nx(nb),dcpl_mb(tmp2(1)),1,A(indx),1) 1182 end do 1183 end do 1184#endif 1185 !************************* 1186 !**** hilbert mapping **** 1187 !************************* 1188 else 1189 1190* ************************************************* 1191* *** do fft along kz dimension *** 1192* *** A(nz(nb),kx,ky) <- fft1d^(-1)[A(kz,kx,ky)] *** 1193* ************************************************* 1194#ifdef MLIB 1195 indx = 1 1196 do q=1,nq3(nb) 1197 !indx = 1 + (q-1)*nz(nb) 1198 call z1dfft(A(indx),nz(nb),dcpl_mb(tmpz(1,nb)),-2,ierr) 1199 indx = indx + nz(nb) 1200 end do 1201#else 1202 indx = 1 1203 do q=1,nq3(nb) 1204 !indx = 1 + (q-1)*nz(nb) 1205 call dcfftb(nz(nb),A(indx),dcpl_mb(tmpz(1,nb))) 1206 indx = indx + nz(nb) 1207 end do 1208#endif 1209 1210 call C3dB_c_transpose_ijk(nb,3,A,dcpl_mb(tmp1(1)), 1211 > dcpl_mb(tmp2(1))) 1212 1213 1214* ************************************************* 1215* *** do fft along ky dimension *** 1216* *** A(ny(nb),nz(nb),kx) <- fft1d^(-1)[A(ky,nz(nb),kx)] *** 1217* ************************************************* 1218#ifdef MLIB 1219 indx = 1 1220 do q=1,nq2(nb) 1221 !indx = 1 + (q-1)*ny(nb) 1222 call z1dfft(A(indx),ny(nb),dcpl_mb(tmpy(1,nb)),-2,ierr) 1223 indx = indx + ny(nb) 1224 end do 1225#else 1226 indx = 1 1227 do q=1,nq2(nb) 1228 !indx = 1 + (q-1)*ny(nb) 1229 call dcfftb(ny(nb),A(indx),dcpl_mb(tmpy(1,nb))) 1230 indx = indx + ny(nb) 1231 end do 1232#endif 1233 1234 call C3dB_c_transpose_ijk(nb,4,A,dcpl_mb(tmp1(1)), 1235 > dcpl_mb(tmp2(1))) 1236 1237* ************************************************* 1238* *** do fft along kx dimension *** 1239* *** A(nx(nb),ny(nb),nz(nb)) <- fft1d^(-1)[A(kx,ny(nb),nz(nb))] *** 1240* ************************************************* 1241#ifdef MLIB 1242 indx = 1 1243 do q=1,nq1(nb) 1244 !indx = 1 + (q-1)*nx(nb) 1245 call z1dfft(A(indx),nx(nb),dcpl_mb(tmpx(1,nb)),-2,ierr) 1246 indx = indx + nx(nb) 1247 end do 1248#else 1249 indx = 1 1250 do q=1,nq1(nb) 1251 !indx = 1 + (q-1)*ny(nb) 1252 call dcfftb(nx(nb),A(indx),dcpl_mb(tmpx(1,nb))) 1253 indx = indx + nx(nb) 1254 end do 1255#endif 1256 1257 1258 1259 end if 1260 1261* **** deallocate temporary space **** 1262 value = BA_pop_stack(tmp2(2)) 1263 value = BA_pop_stack(tmp1(2)) 1264 1265 call nwpw_timing_end(1) 1266 return 1267 end 1268 1269 1270 1271 1272 1273* *********************************** 1274* * * 1275* * C3dB_rc_fft3f * 1276* * * 1277* *********************************** 1278 1279 subroutine C3dB_rc_fft3f(nb,A) 1280 1281***************************************************** 1282* * 1283* This routine performs the operation of * 1284* a three dimensional complex to complex fft * 1285* A(kx,ky,kz) <- FFT3[A(nx(nb),ny(nb),nz(nb))] * 1286* * 1287* Entry - * 1288* A: a column distribuded 3d block * 1289* tmp: tempory work space must be at * 1290* least the size of (complex) * 1291* (nfft*nfft + 1) + 10*nfft * 1292* * 1293* Exit - A is transformed * 1294* * 1295* uses - transpose1 subroutine * 1296* * 1297***************************************************** 1298 1299 implicit none 1300 integer nb 1301 complex*16 A(*) 1302 1303#include "bafdecls.fh" 1304#include "C3dB.fh" 1305#include "errquit.fh" 1306 1307 1308 integer tmpx(2,NBLOCKS),tmpy(2,NBLOCKS),tmpz(2,NBLOCKS) 1309 common / C3dB_fft / tmpx,tmpy,tmpz 1310 1311 1312* *** local variables *** 1313 integer i,j,q,indx 1314 1315 !integer nfft3d 1316 integer tmp1(2),tmp2(2) 1317 logical value 1318 1319 1320 call nwpw_timing_start(1) 1321 1322* ***** allocate temporary space **** 1323 !call C3dB_nfft3d(nb,nfft3d) 1324 value = BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp1',tmp1(2),tmp1(1)) 1325 value = value.and. 1326 > BA_push_get(mt_dcpl,(nfft3d(nb)),'tmp2',tmp2(2),tmp2(1)) 1327 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 1328 1329 1330 1331 !********************** 1332 !**** slab mapping **** 1333 !********************** 1334 if (mapping.eq.1) then 1335 1336* ******************************************** 1337* *** do fft along nx(nb) dimension *** 1338* *** A(kx,ny(nb),nz(nb)) <- fft1d[A(nx(nb),ny(nb),nz(nb))] *** 1339* ******************************************** 1340 !call dcffti(nx(nb),dcpl_mb(tmp1(1))) 1341 do q=1,nq(nb) 1342 do j=1,ny(nb) 1343 indx = 1 + (j-1)*nx(nb) + (q-1)*nx(nb)*ny(nb) 1344 call zcopy((nx(nb)),A(indx),1,dcpl_mb(tmp2(1)),1) 1345 call dcfftf(nx(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpx(1,nb))) 1346 call zcopy(nx(nb),dcpl_mb(tmp2(1)),1,A(indx),1) 1347 end do 1348 end do 1349 1350* ******************************************** 1351* *** do fft along ny(nb) dimension *** 1352* *** A(kx,ky,nz(nb)) <- fft1d[A(kx,ny(nb),nz(nb))] *** 1353* ******************************************** 1354 !call dcffti(ny(nb),dcpl_mb(tmp1(1))) 1355 do q=1,nq(nb) 1356 do i=1,nx(nb) 1357 indx = i + (q-1)*nx(nb)*ny(nb) 1358 call zcopy(ny(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1) 1359 call dcfftf(ny(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpy(1,nb))) 1360 call zcopy(ny(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb)) 1361 end do 1362 end do 1363 1364 1365* ******************************************** 1366* *** Do a transpose of A *** 1367* *** A(ky,nz(nb),ky) <- A(kx,ky,nz(nb)) *** 1368* ******************************************** 1369 call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dcpl_mb(tmp1(1))) 1370 1371 1372* ******************************************** 1373* *** do fft along nz(nb) dimension *** 1374* *** A(kx,kz,ky) <- fft1d[A(kx,nz(nb),ky)] *** 1375* ******************************************** 1376 !call dcffti(nz(nb),dcpl_mb(tmp1(1))) 1377 do q=1,nq(nb) 1378 do i=1,nx(nb) 1379 indx = i + (q-1)*nx(nb)*ny(nb) 1380 call zcopy(nz(nb),A(indx),nx(nb),dcpl_mb(tmp2(1)),1) 1381 call dcfftf(nz(nb),dcpl_mb(tmp2(1)),dcpl_mb(tmpz(1,nb))) 1382 call zcopy(nz(nb),dcpl_mb(tmp2(1)),1,A(indx),nx(nb)) 1383 end do 1384 end do 1385 1386* ******************************************** 1387* *** Do a transpose of A *** 1388* *** A(kx,ky,kz) <- A(kx,kz,ky) *** 1389* ******************************************** 1390c call C3dB_c_transpose_jk(nb,A,dcpl_mb(tmp2(1)),dcpl_mb(tmp1(1))) 1391 1392 1393 1394 !************************* 1395 !**** hilbert mapping **** 1396 !************************* 1397 else 1398 1399* ******************************************** 1400* *** do fft along nx(nb) dimension *** 1401* *** A(kx,ny(nb),nz(nb)) <- fft1d[A(nx(nb),ny(nb),nz(nb))] *** 1402* ******************************************** 1403#ifdef MLIB 1404 indx = 1 1405 do q=1,nq1(nb) 1406 !indx = 1 + (q-1)*nx(nb) 1407 call z1dfft(A(indx),nx(nb),dcpl_mb(tmpx(1,nb)),1,ierr) 1408 indx = indx + nx(nb) 1409 end do 1410#else 1411 indx = 1 1412 do q=1,nq1(nb) 1413 !indx = 1 + (q-1)*nx(nb) 1414 call dcfftf(nx(nb),A(indx),dcpl_mb(tmpx(1,nb))) 1415 indx = indx + nx(nb) 1416 end do 1417#endif 1418 1419 call C3dB_c_transpose_ijk(nb,1,A,dcpl_mb(tmp1(1)), 1420 > dcpl_mb(tmp2(1))) 1421 1422* ******************************************** 1423* *** do fft along ny(nb) dimension *** 1424* *** A(ky,nz(nb),kx) <- fft1d[A(ny(nb),nz(nb),kx)] *** 1425* ******************************************** 1426#ifdef MLIB 1427 indx = 1 1428 do q=1,nq2(nb) 1429 !indx = 1 + (q-1)*ny(nb) 1430 call z1dfft(A(indx),ny(nb),dcpl_mb(tmpy(1,nb)),1,ierr) 1431 indx = indx + ny(nb) 1432 end do 1433#else 1434 indx = 1 1435 do q=1,nq2(nb) 1436 !indx = 1 + (q-1)*ny(nb) 1437 call dcfftf(ny(nb),A(indx),dcpl_mb(tmpy(1,nb))) 1438 indx = indx + ny(nb) 1439 end do 1440#endif 1441 1442 call C3dB_c_transpose_ijk(nb,2,A,dcpl_mb(tmp1(1)), 1443 > dcpl_mb(tmp2(1))) 1444 1445* ******************************************** 1446* *** do fft along nz(nb) dimension *** 1447* *** A(kz,kx,ky) <- fft1d[A(nz(nb),kx,ky)] *** 1448* ******************************************** 1449#ifdef MLIB 1450 indx = 1 1451 do q=1,nq3(nb) 1452 !indx = 1 + (q-1)*nz(nb) 1453 call z1dfft(A(indx),nz(nb),dcpl_mb(tmpz(1,nb)),1,ierr) 1454 indx = indx + nz(nb) 1455 end do 1456#else 1457 indx = 1 1458 do q=1,nq3(nb) 1459 !indx = 1 + (q-1)*nz(nb) 1460 call dcfftf(nz(nb),A(indx),dcpl_mb(tmpz(1,nb))) 1461 indx = indx + nz(nb) 1462 end do 1463#endif 1464 1465 1466 1467 end if 1468 1469* **** deallocate temporary space **** 1470 value = BA_pop_stack(tmp2(2)) 1471 value = BA_pop_stack(tmp1(2)) 1472 1473 call nwpw_timing_end(1) 1474 return 1475 end 1476 1477 1478 1479 1480 1481* *********************************** 1482* * * 1483* * C3dB_(c,r)_SMul * 1484* * * 1485* *********************************** 1486 1487* This routine performs the operation C = scale * A 1488* where scale is a real*8 number. 1489 1490 subroutine C3dB_c_SMul(nb,scale,A,C) 1491 implicit none 1492 integer nb 1493 real*8 scale 1494 complex*16 A(*) 1495 complex*16 C(*) 1496 1497#include "C3dB.fh" 1498 1499 integer i 1500 1501!$OMP DO 1502 do i=1,nfft3d_map(nb) 1503 C(i) = scale*A(i) 1504 end do 1505!$OMP END DO 1506 return 1507 end 1508 1509 1510 subroutine C3dB_c_SMul1(nb,scale,A) 1511 implicit none 1512 integer nb 1513 real*8 scale 1514 complex*16 A(*) 1515 1516#include "C3dB.fh" 1517 1518 integer i 1519 1520!$OMP DO 1521 do i=1,nfft3d_map(nb) 1522 A(i) = scale*A(i) 1523 end do 1524!$OMP END DO 1525 return 1526 end 1527 1528 1529 subroutine C3dB_b_SMul1(nb,scale,A) 1530 implicit none 1531 integer nb 1532 real*8 scale 1533 complex*16 A(*) 1534#include "C3dB.fh" 1535 integer i 1536 1537!$OMP DO 1538 do i=1,n2ft3d_map(nb) 1539 A(i) = scale*A(i) 1540 end do 1541!$OMP END DO 1542 return 1543 end 1544 1545 1546 1547 subroutine C3dB_r_SMul(nb,scale,A,C) 1548 implicit none 1549 integer nb 1550 real*8 scale 1551 real*8 A(*) 1552 real*8 C(*) 1553 1554#include "C3dB.fh" 1555 1556 integer i 1557 1558!$OMP DO 1559 do i=1,n2ft3d_map(nb) 1560 C(i) = scale*A(i) 1561 end do 1562!$OMP END DO 1563 return 1564 end 1565 1566 subroutine C3dB_r_SMul1(nb,scale,A) 1567 implicit none 1568 integer nb 1569 real*8 scale 1570 real*8 A(*) 1571 1572#include "C3dB.fh" 1573 1574 integer i 1575 1576!$OMP DO 1577 do i=1,n2ft3d_map(nb) 1578 A(i) = scale*A(i) 1579 end do 1580!$OMP END DO 1581 return 1582 end 1583 1584 1585 subroutine C3dB_c_ZMul(nb,scale,A,C) 1586 implicit none 1587 integer nb 1588 complex*16 scale 1589 complex*16 A(*) 1590 complex*16 C(*) 1591 1592#include "C3dB.fh" 1593 1594 integer i 1595 1596!$OMP DO 1597 do i=1,nfft3d_map(nb) 1598 C(i) = scale*A(i) 1599 end do 1600!$OMP END DO 1601 return 1602 end 1603 1604 1605* *********************************** 1606* * * 1607* * C3dB_rc_SMul * 1608* * * 1609* *********************************** 1610 1611* This routine performs the operation C = scale * A 1612* where scale and A are real*8 numbers. 1613 1614 subroutine C3dB_rc_SMul(nb,scale,A,C) 1615 implicit none 1616 integer nb 1617 real*8 scale 1618 real*8 A(*) 1619 complex*16 C(*) 1620 1621#include "C3dB.fh" 1622 1623 integer i 1624 1625!$OMP DO 1626 do i=1,n2ft3d_map(nb) 1627 C(i) = dcmplx(scale*A(i),0.0d0) 1628 end do 1629!$OMP END DO 1630 return 1631 end 1632 1633* *********************************** 1634* * * 1635* * C3dB_cr_aSqrpy * 1636* * * 1637* *********************************** 1638 1639* This routine performs the operation C = C + w*A * A 1640 1641 subroutine C3dB_cr_aSqrpy(nb,w,A,C) 1642 implicit none 1643 integer nb 1644 real*8 w 1645 complex*16 A(*) 1646 real*8 C(*),ar,ai 1647 1648#include "C3dB.fh" 1649 1650 integer i 1651 1652!$OMP DO 1653 do i=1,n2ft3d_map(nb) 1654 ar=dble(A(i)) 1655 ai=dimag(A(i)) 1656 C(i) = C(i) + w*(ar*ar+ai*ai) 1657 end do 1658!$OMP END DO 1659 return 1660 end 1661 1662 1663 1664 1665* *********************************** 1666* * * 1667* * C3dB_cr_Sqr * 1668* * * 1669* *********************************** 1670 1671* This routine performs the operation C = A * A 1672 1673 subroutine C3dB_cr_Sqr(nb,A,C) 1674 implicit none 1675 integer nb 1676 complex*16 A(*) 1677 real*8 C(*),ar,ai 1678 1679#include "C3dB.fh" 1680 1681 integer i 1682 1683!$OMP DO 1684 do i=1,n2ft3d_map(nb) 1685 ar=dble(A(i)) 1686 ai=dimag(A(i)) 1687 C(i) = ar*ar + ai*ai 1688 end do 1689!$OMP END DO 1690 return 1691 end 1692 1693 1694* *********************************** 1695* * * 1696* * C3dB_cc_absSqrt1 * 1697* * * 1698* *********************************** 1699 1700* This routine performs the operation A = sqrt(abs(A)) 1701 1702 subroutine C3dB_cc_absSqrt1(nb,A) 1703 implicit none 1704 integer nb 1705 complex*16 A(*) 1706 real*8 ar,ai 1707 1708#include "C3dB.fh" 1709 1710 integer i 1711 1712!$OMP DO 1713 do i=1,n2ft3d_map(nb) 1714 ar=dble(A(i)) 1715 !ai=dimag(A(i)) 1716 !A(i) = dcmplx(dsqrt(dsqrt(ar*ar + ai*ai)),0.0d0) 1717 A(i) = dcmplx(dsqrt(dabs(ar)),0.0d0) 1718 end do 1719!$OMP END DO 1720 return 1721 end 1722 1723 1724 1725* *********************************** 1726* * * 1727* * C3dB_cr_real * 1728* * * 1729* *********************************** 1730 1731* This routine performs the operation C = real(A) 1732 1733 subroutine C3dB_cr_real(nb,A,C) 1734 implicit none 1735 integer nb 1736 complex*16 A(*) 1737 real*8 C(*) 1738 1739#include "C3dB.fh" 1740 1741 integer i 1742 1743!$OMP DO 1744 do i=1,n2ft3d_map(nb) 1745 C(i) = dble(A(i)) 1746 end do 1747!$OMP END DO 1748 return 1749 end 1750 1751 1752 1753* *********************************** 1754* * * 1755* * C3dB_cr_imag * 1756* * * 1757* *********************************** 1758 1759* This routine performs the operation C = imag(A) 1760 1761 subroutine C3dB_cr_imag(nb,A,C) 1762 implicit none 1763 integer nb 1764 complex*16 A(*) 1765 real*8 C(*) 1766 1767#include "C3dB.fh" 1768 1769 integer i 1770 1771!$OMP DO 1772 do i=1,n2ft3d_map(nb) 1773 C(i) = dimag(A(i)) 1774 end do 1775!$OMP END DO 1776 return 1777 end 1778 1779 1780 1781 1782* *********************************** 1783* * * 1784* * C3dB_ccr_Mul * 1785* * * 1786* *********************************** 1787 1788* This routine performs the operation C = dble(A * B) 1789 1790 subroutine C3dB_ccr_Mul(nb,A,B,C) 1791 implicit none 1792 integer nb 1793 complex*16 A(*) 1794 complex*16 B(*) 1795 real*8 C(*) 1796 1797#include "C3dB.fh" 1798 1799 integer i 1800 1801!$OMP DO 1802 do i=1,n2ft3d_map(nb) 1803 C(i) = dble(A(i))*dble(B(i)) + dimag(A(i))*dimag(B(i)) 1804 end do 1805!$OMP END DO 1806 return 1807 end 1808 1809 1810 1811* *********************************** 1812* * * 1813* * C3dB_rr_Sqr * 1814* * * 1815* *********************************** 1816 1817 subroutine C3dB_rr_Sqr(nb,A,C) 1818 implicit none 1819 integer nb 1820 real*8 A(*) 1821 real*8 C(*) 1822 1823#include "C3dB.fh" 1824 1825 integer i 1826 1827!$OMP DO 1828 do i=1,n2ft3d_map(nb) 1829 C(i) = A(i)**2 1830 end do 1831!$OMP END DO 1832 return 1833 end 1834 1835* *********************************** 1836* * * 1837* * C3dB_rr_Sqrt * 1838* * * 1839* *********************************** 1840 1841 subroutine C3dB_rr_Sqrt(nb,A,C) 1842 implicit none 1843 integer nb 1844 real*8 A(*) 1845 real*8 C(*) 1846 1847#include "C3dB.fh" 1848 1849 integer i 1850 1851!$OMP DO 1852 do i=1,n2ft3d_map(nb) 1853 C(i) = dsqrt(A(i)) 1854 end do 1855!$OMP END DO 1856 return 1857 end 1858 1859 1860 1861 1862* *********************************** 1863* * * 1864* * C3dB_c_transpose_jk_init * 1865* * * 1866* *********************************** 1867 1868 subroutine C3dB_c_transpose_jk_init(nb) 1869 implicit none 1870 integer nb 1871 1872#include "bafdecls.fh" 1873#include "errquit.fh" 1874#include "C3dB.fh" 1875 1876c integer iq_to_i1((NFFT1/2+1)*NFFT2*NSLABS) 1877c integer iq_to_i2((NFFT1/2+1)*NFFT2*NSLABS) 1878c integer i1_start(NFFT3+1) 1879c integer i2_start(NFFT3+1) 1880 integer iq_to_i1(2,NBLOCKS) 1881 integer iq_to_i2(2,NBLOCKS) 1882 integer i1_start(2,NBLOCKS) 1883 integer i2_start(2,NBLOCKS) 1884 common / c_trans_blk / iq_to_i1,iq_to_i2,i1_start,i2_start 1885 1886 1887 1888* **** local variables **** 1889 integer proc_to,proc_from 1890 integer pto,qto,np,taskid 1891 integer pfrom,qfrom 1892 integer phere,qhere 1893 integer index1,index2,itmp 1894 integer i,j,k,it 1895 logical value 1896 1897* **** external functions **** 1898 1899* **** allocate c_trans_blk common block **** 1900 value = BA_alloc_get(mt_int,(nx(nb)*ny(nb)*nq(nb)), 1901 > 'iq_to_i1',iq_to_i1(2,nb),iq_to_i1(1,nb)) 1902 value = BA_alloc_get(mt_int,(nx(nb)*ny(nb)*nq(nb)), 1903 > 'iq_to_i2',iq_to_i2(2,nb),iq_to_i2(1,nb)) 1904 1905 value = BA_alloc_get(mt_int,(nz(nb)+1), 1906 > 'i1_start',i1_start(2,nb),i1_start(1,nb)) 1907 value = BA_alloc_get(mt_int,(nz(nb)+1), 1908 > 'i2_start',i2_start(2,nb),i2_start(1,nb)) 1909 1910 call Parallel3d_taskid_i(taskid) 1911 call Parallel3d_np_i(np) 1912 1913 index1 = 1 1914 index2 = 1 1915 do it=0,np-1 1916 proc_to = mod(taskid+it,np) 1917 proc_from = mod(taskid-it+np,np) 1918 int_mb(i1_start(1,nb)+it) = index1 1919 int_mb(i2_start(1,nb)+it) = index2 1920 1921 do k=1,nz(nb) 1922 do j=1,ny(nb) 1923 1924* **** packing scheme **** 1925 call C3dB_ktoqp(nb,k,qhere,phere) 1926 call C3dB_ktoqp(nb,j,qto,pto) 1927 if ((phere.eq.taskid).and.(pto.eq.proc_to)) then 1928 do i=1,nx(nb) 1929 itmp = i + (j-1)*nx(nb) 1930 > + (qhere-1)*nx(nb)*ny(nb) 1931 int_mb(iq_to_i1(1,nb)+itmp-1) = index1 1932 index1 = index1 + 1 1933 end do 1934 end if 1935 1936* **** unpacking scheme **** 1937 call C3dB_ktoqp(nb,j,qhere,phere) 1938 call C3dB_ktoqp(nb,k,qfrom,pfrom) 1939 if ((phere.eq.taskid).and.(pfrom.eq.proc_from)) then 1940 do i=1,nx(nb) 1941 itmp = i + (k-1)*nx(nb) 1942 > + (qhere-1)*nx(nb)*ny(nb) 1943 int_mb(iq_to_i2(1,nb)+itmp-1) = index2 1944 index2 = index2 + 1 1945 end do 1946 end if 1947 end do 1948 end do 1949 end do 1950 int_mb(i1_start(1,nb)+np) = index1 1951 int_mb(i2_start(1,nb)+np) = index2 1952 1953 return 1954 end 1955 1956 1957* ******************************************* 1958* * * 1959* * C3dB_r_FormatWrite_reverse * 1960* * * 1961* ******************************************* 1962 1963 subroutine C3dB_r_FormatWrite_reverse(nb,iunit,A,tmp) 1964 implicit none 1965 integer nb 1966 integer iunit 1967 real*8 A(*) 1968 real*8 tmp(*) 1969 1970#include "C3dB.fh" 1971#include "tcgmsg.fh" 1972#include "msgtypesf.h" 1973 1974 integer rcv_len,rcv_proc 1975 1976 1977* *** local variables *** 1978 integer MASTER,taskid 1979 parameter(MASTER=0) 1980 integer p_from, p_here,q 1981 integer i,j,k,index 1982 integer dest,source,status,msglen,idum 1983 1984 call Parallel3d_taskid_i(taskid) 1985 1986 !********************** 1987 !**** slab mapping **** 1988 !********************** 1989 if (mapping.eq.1) then 1990 1991* **** master node reads from file and distributes **** 1992 if (taskid.eq.MASTER) then 1993 do i=1,nx(nb) 1994 do j=1,ny(nb) 1995 1996 do k=1,nz(nb) 1997 call C3dB_ktoqp(nb,k,q,p_from) 1998 if (p_from.eq.MASTER) then 1999 index = (q-1)*(nx(nb))*ny(nb) 2000 > + (j-1)*(nx(nb)) + i 2001 tmp(k) = A(index) 2002 else 2003 msglen = 1 2004 status = msglen 2005 source = p_from 2006 idum = -999 2007 2008 call SND(9+MSGINT,idum,mitob(msglen),source,1) 2009 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len, 2010 > source,rcv_proc,1) 2011 2012 end if 2013 end do 2014 write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb)) 2015 2016 end do 2017 end do 2018 2019* **** not master node **** 2020 else 2021 do i=1,nx(nb) 2022 do j=1,ny(nb) 2023 2024 do k=1,nz(nb) 2025 call C3dB_ktoqp(nb,k,q,p_here) 2026 if (p_here.eq.taskid) then 2027 2028 index = (q-1)*(nx(nb))*ny(nb) 2029 > + (j-1)*(nx(nb)) + i 2030 tmp(1) = A(index) 2031 2032 msglen = 1 2033 dest = MASTER 2034 2035 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len, 2036 > dest,rcv_proc,1) 2037 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 2038 2039 end if 2040 end do 2041 2042 end do 2043 end do 2044 end if 2045 2046 !************************* 2047 !**** hilbert mapping **** 2048 !************************* 2049 else 2050 2051 2052* **** master node reads from file and distributes **** 2053 if (taskid.eq.MASTER) then 2054 do i=1,nx(nb) 2055 do j=1,ny(nb) 2056 2057 do k=1,nz(nb) 2058 call C3dB_ijktoindex2p(nb,i,j,k,index,p_from) 2059 if (p_from.eq.MASTER) then 2060 tmp(k) = A(index) 2061 else 2062 msglen = 1 2063 status = msglen 2064 source = p_from 2065 idum = -999 2066 2067 call SND(9+MSGINT,idum,mitob(msglen),source,1) 2068 call RCV(9+MSGDBL,tmp(k),mdtob(msglen),rcv_len, 2069 > source,rcv_proc,1) 2070 end if 2071 end do 2072 write(iunit,'(6E13.5)') (tmp(k), k=1,nz(nb)) 2073 2074 end do 2075 end do 2076 2077* **** not master node **** 2078 else 2079 do i=1,nx(nb) 2080 do j=1,ny(nb) 2081 2082 do k=1,nz(nb) 2083 call C3dB_ijktoindex2p(nb,i,j,k,index,p_here) 2084 2085 if (p_here.eq.taskid) then 2086 2087 tmp(1) = A(index) 2088 2089 msglen = 1 2090 dest = MASTER 2091 rcv_proc = MASTER 2092 rcv_len = mitob(1) 2093 call RCV(9+MSGINT,idum,mitob(msglen),rcv_len, 2094 > dest,rcv_proc,1) 2095 call SND(9+MSGDBL,tmp,mdtob(msglen),dest,1) 2096 end if 2097 end do 2098 2099 end do 2100 end do 2101 end if 2102 2103 end if 2104 2105* **** wait **** 2106 return 2107 end 2108 2109 2110 2111* *********************************** 2112* * * 2113* * C3dB_cc_dot * 2114* * * 2115* *********************************** 2116 2117 subroutine C3dB_cc_dot(nb,A,B,sumall) 2118 implicit none 2119 integer nb 2120 real*8 A(*) 2121 real*8 B(*) 2122 real*8 sumall 2123 2124#include "C3dB.fh" 2125 2126 integer np 2127 real*8 sum 2128 2129 2130* **** external functions **** 2131 real*8 ddot 2132 external ddot 2133 2134 call nwpw_timing_start(2) 2135 2136 call Parallel3d_np_i(np) 2137 2138* **** sum up dot product on this node **** 2139 sum = ddot(nfft3d_map(nb),A(1),2,B(1),2) 2140 > + ddot(nfft3d_map(nb),A(2),2,B(2),2) 2141 2142 2143 2144* **** add up sums from other nodes **** 2145 if (np.gt.1) then 2146 call C3dB_SumAll(sum) 2147 end if 2148 2149 call nwpw_timing_end(2) 2150 2151 sumall = sum 2152 return 2153 end 2154 2155* *********************************** 2156* * * 2157* * C3dB_cc_idot * 2158* * * 2159* *********************************** 2160 2161 subroutine C3dB_cc_idot(nb,A,B,sumall) 2162 implicit none 2163 integer nb 2164 real*8 A(*) 2165 real*8 B(*) 2166 real*8 sumall 2167 2168#include "C3dB.fh" 2169 2170 real*8 sum 2171 2172 2173 real*8 ddot 2174 external ddot 2175 2176 call nwpw_timing_start(2) 2177 2178* **** sum up dot product on this node **** 2179 sum = ddot(nfft3d_map(nb),A(1),2,B(1),2) 2180 > + ddot(nfft3d_map(nb),A(2),2,B(2),2) 2181 2182* **** do not add up sums from other nodes **** 2183 call nwpw_timing_end(2) 2184 2185 sumall = sum 2186 return 2187 end 2188 2189 2190 subroutine C3dB_bb_idot(nb,A,B,sumall) 2191 implicit none 2192 integer nb 2193 real*8 A(*) 2194 real*8 B(*) 2195 real*8 sumall 2196 2197#include "C3dB.fh" 2198 2199 real*8 sum 2200 2201 2202 real*8 ddot 2203 external ddot 2204 2205 call nwpw_timing_start(2) 2206 2207* **** sum up dot product on this node **** 2208 sum = ddot(n2ft3d_map(nb),A(1),2,B(1),2) 2209 > + ddot(n2ft3d_map(nb),A(2),2,B(2),2) 2210 2211* **** do not add up sums from other nodes **** 2212 call nwpw_timing_end(2) 2213 2214 sumall = sum 2215 return 2216 end 2217 2218 2219 2220* *********************************** 2221* * * 2222* * C3dB_rr_dot * 2223* * * 2224* *********************************** 2225 2226 subroutine C3dB_rr_dot(nb,A,B,sumall) 2227 implicit none 2228 integer nb 2229 real*8 A(*) 2230 real*8 B(*) 2231 real*8 sumall 2232 2233#include "C3dB.fh" 2234 2235 integer np 2236 real*8 sum 2237 2238 real*8 ddot 2239 external ddot 2240 2241 call Parallel3d_np_i(np) 2242 2243* **** sum up dot product on this node **** 2244 sum = ddot(n2ft3d_map(nb),A,1,B,1) 2245 2246* **** add up sums from other nodes **** 2247 if (np.gt.1) then 2248 call C3dB_SumAll(sum) 2249 end if 2250 2251 sumall = sum 2252 return 2253 end 2254 2255 2256* *********************************** 2257* * * 2258* * C3dB_tt_dot * 2259* * * 2260* *********************************** 2261 2262 subroutine C3dB_tt_dot(nb,A,B,sumall) 2263 implicit none 2264 integer nb 2265 real*8 A(*) 2266 real*8 B(*) 2267 real*8 sumall 2268 2269#include "C3dB.fh" 2270 2271 integer np 2272 real*8 sum 2273 2274 real*8 ddot 2275 external ddot 2276 2277 call Parallel3d_np_i(np) 2278 2279* **** sum up dot product on this node **** 2280 sum = ddot(nfft3d_map(nb),A,1,B,1) 2281 2282* **** add up sums from other nodes **** 2283 if (np.gt.1) then 2284 call C3dB_SumAll(sum) 2285 end if 2286 2287 sumall = sum 2288 return 2289 end 2290 2291* *********************************** 2292* * * 2293* * C3dB_tt_idot * 2294* * * 2295* *********************************** 2296 2297 subroutine C3dB_tt_idot(nb,A,B,sumall) 2298 implicit none 2299 integer nb 2300 real*8 A(*) 2301 real*8 B(*) 2302 real*8 sumall 2303 2304#include "C3dB.fh" 2305 2306 integer np 2307 real*8 sum 2308 2309 real*8 ddot 2310 external ddot 2311 2312 call Parallel3d_np_i(np) 2313 2314* **** sum up dot product on this node **** 2315 sum = ddot(nfft3d_map(nb),A,1,B,1) 2316 2317c* **** add up sums from other nodes **** 2318c if (np.gt.1) then 2319c call C3dB_SumAll(sum) 2320c end if 2321 2322 sumall = sum 2323 return 2324 end 2325 2326 2327* *********************************** 2328* * * 2329* * C3dB_rr_idot * 2330* * * 2331* *********************************** 2332 2333 subroutine C3dB_rr_idot(nb,A,B,sumall) 2334 implicit none 2335 integer nb 2336 real*8 A(*) 2337 real*8 B(*) 2338 real*8 sumall 2339 2340#include "C3dB.fh" 2341 2342 integer np 2343 real*8 sum 2344 2345 real*8 ddot 2346 external ddot 2347 2348c call Parallel3d_np_i(np) 2349 2350* **** sum up dot product on this node **** 2351 sum = ddot(n2ft3d_map(nb),A,1,B,1) 2352 2353* **** add up sums from other nodes **** 2354* if (np.gt.1) then 2355* call C3dB_SumAll(sum) 2356* end if 2357 2358 sumall = sum 2359 return 2360 end 2361 2362 2363 2364* *********************************** 2365* * * 2366* * C3dB_cc_Mul * 2367* * * 2368* *********************************** 2369 2370 subroutine C3dB_cc_Mul(nb,A,B,C) 2371 implicit none 2372 integer nb 2373 complex*16 A(*) 2374 complex*16 B(*) 2375 complex*16 C(*) 2376 2377#include "C3dB.fh" 2378 2379 integer i 2380 2381!$OMP DO 2382 do i=1,nfft3d_map(nb) 2383 C(i) = dconjg(A(i)) * B(i) 2384 end do 2385!$OMP END DO 2386 2387 return 2388 end 2389 2390 2391 2392 subroutine C3dB_bb_Mul(nb,A,B,C) 2393 implicit none 2394 integer nb 2395 complex*16 A(*) 2396 complex*16 B(*) 2397 complex*16 C(*) 2398#include "C3dB.fh" 2399 integer i 2400!$OMP DO 2401 do i=1,n2ft3d_map(nb) 2402 C(i) = dconjg(A(i)) * B(i) 2403 end do 2404!$OMP END DO 2405 return 2406 end 2407 2408 2409 subroutine C3dB_bb_ncMul(nb,A,B,C) 2410 implicit none 2411 integer nb 2412 complex*16 A(*) 2413 complex*16 B(*) 2414 complex*16 C(*) 2415#include "C3dB.fh" 2416 integer i 2417!$OMP DO 2418 do i=1,n2ft3d_map(nb) 2419 C(i) = A(i) * B(i) 2420 end do 2421!$OMP END DO 2422 return 2423 end 2424 2425 2426 2427* *********************************** 2428* * * 2429* * C3dB_cc_Mul2 * 2430* * * 2431* *********************************** 2432 2433 subroutine C3dB_cc_Mul2(nb,A,B) 2434 implicit none 2435 integer nb 2436 complex*16 A(*) 2437 complex*16 B(*) 2438 2439#include "C3dB.fh" 2440 2441 integer i 2442 2443!$OMP DO 2444 do i=1,nfft3d_map(nb) 2445 B(i) = dconjg(A(i)) * B(i) 2446 end do 2447!$OMP END DO 2448 2449 return 2450 end 2451 2452 2453* *********************************** 2454* * * 2455* * C3dB_cc_Mul2c * 2456* * * 2457* *********************************** 2458 2459 subroutine C3dB_cc_Mul2c(nb,A,B) 2460 implicit none 2461 integer nb 2462 complex*16 A(*) 2463 complex*16 B(*) 2464 2465#include "C3dB.fh" 2466 2467 integer i 2468 2469!$OMP DO 2470 do i=1,nfft3d_map(nb) 2471 B(i) = dconjg(B(i)) * A(i) 2472 end do 2473!$OMP END DO 2474 2475 return 2476 end 2477 2478 2479 2480 2481 subroutine C3dB_bb_Mul2c(nb,A,B) 2482 implicit none 2483 integer nb 2484 complex*16 A(*) 2485 complex*16 B(*) 2486 2487#include "C3dB.fh" 2488 2489 integer i 2490 2491!$OMP DO 2492 do i=1,n2ft3d_map(nb) 2493 B(i) = dconjg(B(i)) * A(i) 2494 end do 2495!$OMP END DO 2496 2497 return 2498 end 2499 2500 2501* *********************************** 2502* * * 2503* * C3dB_lc_Mask * 2504* * * 2505* *********************************** 2506 2507 subroutine C3dB_lc_Mask(nb,masker,A) 2508 implicit none 2509 integer nb 2510 logical masker(*) 2511 complex*16 A(*) 2512 2513#include "C3dB.fh" 2514 2515 integer i 2516 2517!$OMP DO 2518 do i=1,nfft3d_map(nb) 2519 if (masker(i)) A(i) = dcmplx(0.0d0,0.0d0) 2520 end do 2521!$OMP END DO 2522 return 2523 end 2524 2525* *********************************** 2526* * * 2527* * C3dB_lr_Mask * 2528* * * 2529* *********************************** 2530 2531 subroutine C3dB_lr_Mask(nb,masker,A) 2532 implicit none 2533 integer nb 2534 logical masker(*) 2535 real*8 A(*) 2536 2537#include "C3dB.fh" 2538 2539 integer i 2540 2541!$OMP DO 2542 do i=1,nfft3d_map(nb) 2543 if (masker(i)) A(i) = 0.0d0 2544 end do 2545!$OMP END DO 2546 return 2547 end 2548 2549 2550* *********************************** 2551* * * 2552* * C3dB_rc_Mul * 2553* * * 2554* *********************************** 2555 2556 subroutine C3dB_rc_Mul(nb,A,B,C) 2557 implicit none 2558 integer nb 2559 real*8 A(*) 2560 complex*16 B(*) 2561 complex*16 C(*) 2562 2563#include "C3dB.fh" 2564 2565 integer i 2566 2567!$OMP DO 2568 do i=1,nfft3d_map(nb) 2569 C(i) = A(i) * B(i) 2570 end do 2571!$OMP END DO 2572 2573 return 2574 end 2575 2576* *********************************** 2577* * * 2578* * C3dB_rr_Mul * 2579* * * 2580* *********************************** 2581 2582 subroutine C3dB_rr_Mul(nb,A,B,C) 2583 implicit none 2584 integer nb 2585 real*8 A(*) 2586 real*8 B(*) 2587 real*8 C(*) 2588 2589#include "C3dB.fh" 2590 2591 integer i 2592 2593!$OMP DO 2594 do i=1,n2ft3d_map(nb) 2595 C(i) = A(i) * B(i) 2596 end do 2597!$OMP END DO 2598 2599 return 2600 end 2601 2602* *********************************** 2603* * * 2604* * C3dB_rr_Mul2 * 2605* * * 2606* *********************************** 2607 2608 subroutine C3dB_rr_Mul2(nb,A,B) 2609 implicit none 2610 integer nb 2611 real*8 A(*) 2612 real*8 B(*) 2613 2614#include "C3dB.fh" 2615 2616 integer i 2617 2618!$OMP DO 2619 do i=1,n2ft3d_map(nb) 2620 B(i) = B(i) * A(i) 2621 end do 2622!$OMP END DO 2623 2624 return 2625 end 2626 2627 2628 2629* *********************************** 2630* * * 2631* * C3dB_cc_Sum * 2632* * * 2633* *********************************** 2634 2635 subroutine C3dB_cc_Sum(nb,A,B,C) 2636 implicit none 2637 integer nb 2638 complex*16 A(*) 2639 complex*16 B(*) 2640 complex*16 C(*) 2641 2642#include "C3dB.fh" 2643 2644 integer i 2645 2646!$OMP DO 2647 do i=1,nfft3d_map(nb) 2648 C(i) = A(i) + B(i) 2649 end do 2650!$OMP END DO 2651 2652 return 2653 end 2654 2655 2656* *********************************** 2657* * * 2658* * C3dB_cc_Sum2 * 2659* * * 2660* *********************************** 2661 2662 subroutine C3dB_cc_Sum2(nb,A,B) 2663 implicit none 2664 integer nb 2665 complex*16 A(*) 2666 complex*16 B(*) 2667 2668#include "C3dB.fh" 2669 2670 integer i 2671 2672!$OMP DO 2673 do i=1,nfft3d_map(nb) 2674 B(i) = B(i) + A(i) 2675 end do 2676!$OMP END DO 2677 2678 return 2679 end 2680 2681 2682 subroutine C3dB_bb_Sum2(nb,A,B) 2683 implicit none 2684 integer nb 2685 complex*16 A(*) 2686 complex*16 B(*) 2687 2688#include "C3dB.fh" 2689 2690 integer i 2691 2692!$OMP DO 2693 do i=1,n2ft3d_map(nb) 2694 B(i) = B(i) + A(i) 2695 end do 2696!$OMP END DO 2697 2698 return 2699 end 2700 2701 2702 2703* *********************************** 2704* * * 2705* * C3dB_rc_Sum * 2706* * * 2707* *********************************** 2708 2709 subroutine C3dB_rc_Sum(nb,A,B,C) 2710 implicit none 2711 integer nb 2712 real*8 A(*) 2713 complex*16 B(*) 2714 complex*16 C(*) 2715 2716#include "C3dB.fh" 2717 2718 integer i 2719 2720!$OMP DO 2721 do i=1,n2ft3d_map(nb) 2722 C(i) = A(i) + B(i) 2723 end do 2724!$OMP END DO 2725 2726 return 2727 end 2728 2729* *********************************** 2730* * * 2731* * C3dB_rc_Sum2 * 2732* * * 2733* *********************************** 2734 2735 subroutine C3dB_rc_Sum2(nb,A,B) 2736 implicit none 2737 integer nb 2738 real*8 A(*) 2739 complex*16 B(*) 2740 2741#include "C3dB.fh" 2742 2743 integer i 2744 2745!$OMP DO 2746 do i=1,n2ft3d_map(nb) 2747 B(i) = B(i) + A(i) 2748 end do 2749!$OMP END DO 2750 2751 return 2752 end 2753 2754 2755 2756 2757* *********************************** 2758* * * 2759* * C3dB_rr_Sum * 2760* * * 2761* *********************************** 2762 2763 subroutine C3dB_rr_Sum(nb,A,B,C) 2764 implicit none 2765 integer nb 2766 real*8 A(*) 2767 real*8 B(*) 2768 real*8 C(*) 2769 2770#include "C3dB.fh" 2771 2772 integer i 2773 2774!$OMP DO 2775 do i=1,n2ft3d_map(nb) 2776 C(i) = A(i) + B(i) 2777 end do 2778!$OMP END DO 2779 2780 return 2781 end 2782 2783 2784* *********************************** 2785* * * 2786* * C3dB_rr_Sum2 * 2787* * * 2788* *********************************** 2789 2790 subroutine C3dB_rr_Sum2(nb,A,B) 2791 implicit none 2792 integer nb 2793 real*8 A(*) 2794 real*8 B(*) 2795 2796#include "C3dB.fh" 2797 2798 integer i 2799 2800!$OMP DO 2801 do i=1,n2ft3d_map(nb) 2802 B(i) = B(i) + A(i) 2803 end do 2804!$OMP END DO 2805 2806 return 2807 end 2808 2809 2810 2811 2812* *********************************** 2813* * * 2814* * C3dB_rrc_Sum * 2815* * * 2816* *********************************** 2817 2818 subroutine C3dB_rrc_Sum(nb,A,B,C) 2819 implicit none 2820 integer nb 2821 real*8 A(*) 2822 real*8 B(*) 2823 complex*16 C(*) 2824 2825#include "C3dB.fh" 2826 2827 integer i 2828 2829!$OMP DO 2830 do i=1,n2ft3d_map(nb) 2831 C(i) = dcmplx((A(i) + B(i)),0.0d0) 2832 end do 2833!$OMP END DO 2834 2835 return 2836 end 2837 2838 2839* *********************************** 2840* * * 2841* * C3dB_cc_Sub2 * 2842* * * 2843* *********************************** 2844 2845 subroutine C3dB_cc_Sub2(nb,B,C) 2846 implicit none 2847 integer nb 2848 complex*16 B(*) 2849 complex*16 C(*) 2850 2851#include "C3dB.fh" 2852 2853 integer i 2854 2855 do i=1,nfft3d_map(nb) 2856 C(i) = C(i) - B(i) 2857 end do 2858 2859 return 2860 end 2861 2862 subroutine C3dB_bb_Sub2(nb,B,C) 2863 implicit none 2864 integer nb 2865 complex*16 B(*) 2866 complex*16 C(*) 2867 2868#include "C3dB.fh" 2869 2870 integer i 2871 2872!$OMP DO 2873 do i=1,n2ft3d_map(nb) 2874 C(i) = C(i) - B(i) 2875 end do 2876!$OMP END DO 2877 return 2878 end 2879 2880 2881 2882 2883* *********************************** 2884* * * 2885* * C3dB_cc_Sub * 2886* * * 2887* *********************************** 2888 2889 subroutine C3dB_cc_Sub(nb,A,B,C) 2890 implicit none 2891 integer nb 2892 complex*16 A(*) 2893 complex*16 B(*) 2894 complex*16 C(*) 2895 2896#include "C3dB.fh" 2897 2898 integer i 2899 2900!$OMP DO 2901 do i=1,nfft3d_map(nb) 2902 C(i) = A(i) - B(i) 2903 end do 2904!$OMP END DO 2905 2906 return 2907 end 2908 2909 2910* *********************************** 2911* * * 2912* * C3dB_rr_Sub * 2913* * * 2914* *********************************** 2915 2916 subroutine C3dB_rr_Sub(nb,A,B,C) 2917 implicit none 2918 integer nb 2919 real*8 A(*) 2920 real*8 B(*) 2921 real*8 C(*) 2922 2923#include "C3dB.fh" 2924 2925 integer i 2926 2927!$OMP DO 2928 do i=1,n2ft3d_map(nb) 2929 C(i) = A(i) - B(i) 2930 end do 2931!$OMP END DO 2932 2933 return 2934 end 2935 2936 2937 2938 2939* *********************************** 2940* * * 2941* * C3dB_cc_zaxpy * 2942* * * 2943* *********************************** 2944 2945 subroutine C3dB_cc_zaxpy(nb,alpha,A,B) 2946 implicit none 2947 integer nb 2948 complex*16 alpha 2949 complex*16 A(*) 2950 complex*16 B(*) 2951 2952#include "C3dB.fh" 2953 2954 integer i 2955 2956!$OMP DO 2957 do i=1,nfft3d_map(nb) 2958 B(i) = B(i) + alpha*A(i) 2959 end do 2960!$OMP END DO 2961 2962 return 2963 end 2964 2965 2966 2967* *********************************** 2968* * * 2969* * C3dB_cc_daxpy * 2970* * * 2971* *********************************** 2972 2973 subroutine C3dB_cc_daxpy(nb,alpha,A,B) 2974 implicit none 2975 integer nb 2976 real*8 alpha 2977 complex*16 A(*) 2978 complex*16 B(*) 2979 2980#include "C3dB.fh" 2981 2982 integer i 2983 2984!$OMP DO 2985 do i=1,nfft3d_map(nb) 2986 B(i) = B(i) + alpha*A(i) 2987 end do 2988!$OMP END DO 2989 2990 return 2991 end 2992 2993* *********************************** 2994* * * 2995* * C3dB_rr_daxpy * 2996* * * 2997* *********************************** 2998 2999 subroutine C3dB_rr_daxpy(nb,alpha,A,B) 3000 implicit none 3001 integer nb 3002 real*8 alpha 3003 real*8 A(*) 3004 real*8 B(*) 3005 3006#include "C3dB.fh" 3007 3008 integer i 3009 3010!$OMP DO 3011 do i=1,n2ft3d_map(nb) 3012 B(i) = B(i) + alpha* A(i) 3013 end do 3014!$OMP END DO 3015 3016 return 3017 end 3018 3019* *********************************** 3020* * * 3021* * C3dB_rr_Divide * 3022* * * 3023* *********************************** 3024 3025 subroutine C3dB_rr_Divide(nb,A,B,C) 3026 implicit none 3027 integer nb 3028 real*8 A(*) 3029 real*8 B(*) 3030 real*8 C(*) 3031 3032#include "C3dB.fh" 3033 3034 real*8 eta 3035 parameter (eta=1.0d-9) 3036 3037 integer i 3038 3039 3040!$OMP DO 3041 do i=1,n2ft3d_map(nb) 3042 if (dabs(B(i)) .le. eta) then 3043 C(i) = 0.0d0 3044 else 3045 C(i) = A(i) / B(i) 3046 end if 3047 end do 3048!$OMP END DO 3049 3050 return 3051 end 3052 3053 3054* *********************************** 3055* * * 3056* * C3dB_cc_Divide2 * 3057* * * 3058* *********************************** 3059 3060 subroutine C3dB_cc_Divide2(nb,A,B) 3061 implicit none 3062 integer nb 3063 complex*16 A(*) 3064 complex*16 B(*) 3065 3066#include "C3dB.fh" 3067 3068 real*8 eta,ar,br 3069 parameter (eta=1.0d-9) 3070 3071 integer i 3072 3073!$OMP DO 3074 do i=1,nfft3d_map(nb) 3075 ar = dble(A(i)) 3076 br = dble(B(i)) 3077 if (dabs(ar) .le. eta) then 3078 B(i) = dcmplx(0.0d0,0.0d0) 3079 else 3080 B(i) = dcmplx(br/ar,0.0d0) 3081 end if 3082 end do 3083!$OMP END DO 3084 3085 return 3086 end 3087 3088 3089* *********************************** 3090* * * 3091* * C3dB_r_abs1 * 3092* * * 3093* *********************************** 3094 subroutine C3dB_r_abs1(nb,A) 3095 implicit none 3096 integer nb 3097 real*8 A(*) 3098 3099#include "C3dB.fh" 3100 3101 integer i 3102 3103!$OMP DO 3104 do i=1,n2ft3d_map(nb) 3105 A(i) = dabs(A(i)) 3106 end do 3107!$OMP END DO 3108 3109 return 3110 end 3111 3112 3113* *********************************** 3114* * * 3115* * C3dB_c_abs1 * 3116* * * 3117* *********************************** 3118 subroutine C3dB_c_abs1(nb,A) 3119 implicit none 3120 integer nb 3121 complex*16 A(*) 3122 3123#include "C3dB.fh" 3124 3125 integer i 3126 3127!$OMP DO 3128 do i=1,nfft3d_map(nb) 3129 A(i) = dcmplx(dabs(dble(A(i))),dabs(dimag(A(i)))) 3130 end do 3131!$OMP END DO 3132 3133 return 3134 end 3135 3136 3137* *********************************** 3138* * * 3139* * C3dB_r_thresh * 3140* * * 3141* *********************************** 3142 3143 subroutine C3dB_r_thresh(nb,eta,A) 3144 implicit none 3145 integer nb 3146 real*8 eta 3147 real*8 A(*) 3148 3149#include "C3dB.fh" 3150 3151 integer i 3152 3153!$OMP DO 3154 do i=1,n2ft3d_map(nb) 3155 if (dabs(A(i)).lt.eta) A(i) = 0.0d0 3156 end do 3157!$OMP END DO 3158 3159 return 3160 end 3161 3162 3163 3164* *********************************** 3165* * * 3166* * C3dB_rr_Minus * 3167* * * 3168* *********************************** 3169 subroutine C3dB_rr_Minus(nb,A,B,C) 3170 implicit none 3171 integer nb 3172 real*8 A(*) 3173 real*8 B(*) 3174 real*8 C(*) 3175 3176#include "C3dB.fh" 3177 3178 integer i 3179 3180!$OMP DO 3181 do i=1,n2ft3d_map(nb) 3182 C(i) = A(i) - B(i) 3183 end do 3184!$OMP END DO 3185 3186 return 3187 end 3188 3189 3190 3191 3192* *********************************** 3193* * * 3194* * C3dB_r_dsum * 3195* * * 3196* *********************************** 3197 3198 subroutine C3dB_r_dsum(nb,A,sumall) 3199 implicit none 3200 integer nb 3201 real*8 A(*) 3202 real*8 sumall 3203 3204#include "C3dB.fh" 3205 3206 integer i,np 3207 real*8 sum 3208 3209 call Parallel3d_np_i(np) 3210 3211* **** sum up dot product on this node **** 3212 sum = 0.0d0 3213 do i=1,n2ft3d_map(nb) 3214 sum = sum + A(i) 3215 end do 3216 3217* **** add up sums from other nodes **** 3218 if (np.gt.1) then 3219 call C3dB_SumAll(sum) 3220 end if 3221 3222 sumall = sum 3223 3224 return 3225 end 3226 3227* *********************************** 3228* * * 3229* * C3dB_c_dsum * 3230* * * 3231* *********************************** 3232 3233 subroutine C3dB_c_dsum(nb,A,sumall) 3234 implicit none 3235 integer nb 3236 complex*16 A(*) 3237 complex*16 sumall 3238 3239#include "C3dB.fh" 3240 3241 integer i,np 3242 complex*16 sum 3243 3244 call Parallel3d_np_i(np) 3245 3246* **** sum up dot product on this node **** 3247 sum = dcmplx(0.0d0,0.0d0) 3248 do i=1,nfft3d_map(nb) 3249 sum = sum + A(i) 3250 end do 3251 3252* **** add up sums from other nodes **** 3253 if (np.gt.1) then 3254 call C3dB_Vector_SumAll(2,sum) 3255 end if 3256 3257 sumall = sum 3258 3259 return 3260 end 3261 3262 3263 3264 3265* *********************************** 3266* * * 3267* * C3dB_cc_Vector_dot * 3268* * * 3269* *********************************** 3270 3271 subroutine C3dB_cc_Vector_dot(nb,nnfft3d,nn,ne,A,B,sumall) 3272 implicit none 3273 integer nb 3274 integer nnfft3d,nn,ne 3275 real*8 A(*) 3276 real*8 B(*) 3277 real*8 sumall(nn,nn) 3278 3279#include "C3dB.fh" 3280 3281 integer np 3282 integer n,m,shift1,shift2 3283 real*8 sum 3284 3285 real*8 ddot 3286 external ddot 3287 3288 call nwpw_timing_start(2) 3289 3290 call Parallel3d_np_i(np) 3291 3292* **** sum up dot product on this node **** 3293 do n=1,ne 3294 do m=n,ne 3295 3296 shift1 = 1 + (n-1)*nnfft3d*2 3297 shift2 = 1 + (m-1)*nnfft3d*2 3298 3299 sum = ddot(nfft3d_map(nb),A(shift1),2,B(shift2),2) 3300 > + ddot(nfft3d_map(nb),A(shift1+1),2,B(shift2+1),2) 3301 3302 sumall(n,m) = sum 3303 sumall(m,n) = sum 3304 end do 3305 end do 3306 3307 3308* **** add up sums from other nodes **** 3309 if (np.gt.1) then 3310 call C3dB_Vector_SumAll(nn*ne,sumall) 3311 end if 3312 3313 call nwpw_timing_end(2) 3314 3315 return 3316 end 3317 3318 3319 3320* *********************************** 3321* * * 3322* * C3dB_cc_Vector_ndot * 3323* * * 3324* *********************************** 3325 3326 subroutine C3dB_cc_Vector_ndot(nb,nnfft3d,ne,A,B,sumall) 3327 implicit none 3328 integer nb 3329 integer nnfft3d,ne 3330 real*8 A(*) 3331 real*8 B(*) 3332 real*8 sumall(ne) 3333 3334#include "C3dB.fh" 3335 3336 integer np 3337 integer n,shift1 3338 real*8 sum 3339 3340 3341 real*8 ddot 3342 external ddot 3343 3344 call nwpw_timing_start(2) 3345 3346 call Parallel3d_np_i(np) 3347 3348* **** sum up dot product on this node **** 3349 do n=1,ne 3350 3351 shift1 = 1 + (n-1)*nnfft3d*2 3352 sum = ddot(nfft3d_map(nb),A(shift1),2,B(1),2) 3353 > + ddot(nfft3d_map(nb),A(shift1+1),2,B(2),2) 3354 3355 sumall(n) = sum 3356 end do 3357 3358* **** add up sums from other nodes **** 3359 if (np.gt.1) then 3360 call C3dB_Vector_SumAll(ne,sumall) 3361 end if 3362 3363 call nwpw_timing_end(2) 3364 return 3365 end 3366 3367 3368 3369* *********************************** 3370* * * 3371* * C3dB_ic_Mul * 3372* * * 3373* *********************************** 3374 3375 subroutine C3dB_ic_Mul(nb,A,B,C) 3376 implicit none 3377 integer nb 3378 real*8 A(*) 3379 complex*16 B(*) 3380 complex*16 C(*) 3381 3382#include "C3dB.fh" 3383 3384 integer i 3385 3386!$OMP DO 3387 do i=1,nfft3d_map(nb) 3388 C(i) = dcmplx(0.0d0,A(i)) * B(i) 3389 end do 3390!$OMP END DO 3391 3392 return 3393 end 3394 3395 3396 3397* *********************************** 3398* * * 3399* * C3dB_D3dB_r_Copy * 3400* * * 3401* *********************************** 3402 3403 subroutine C3dB_D3dB_r_Copy(nb,A_c3db,B_d3db) 3404 implicit none 3405 integer nb 3406 real*8 A_c3db(*) 3407 real*8 B_d3db(*) 3408 3409#include "C3dB.fh" 3410 3411 integer q,indx1,indx2,nqq 3412 3413 !**** slab mapping **** 3414 if (mapping.eq.1) then 3415 nqq = nq(nb)*ny(nb) 3416 !**** hilbert mapping **** 3417 else 3418 nqq = nq1(nb) 3419 end if 3420 3421 indx1 = 1 3422 indx2 = 1 3423 do q=1,nqq 3424 call dcopy(nx(nb),A_c3db(indx1),1,B_d3dB(indx2),1) 3425 indx1 = indx1 + nx(nb) 3426 indx2 = indx2 + (nx(nb)+2) 3427 end do 3428 call dcopy(nqq,0.0d0,0,B_d3db(nx(nb)+1),nx(nb)+2) 3429 call dcopy(nqq,0.0d0,0,B_d3db(nx(nb)+2),nx(nb)+2) 3430 3431 return 3432 end 3433 3434 3435* *********************************** 3436* * * 3437* * D3dB_C3dB_r_Copy * 3438* * * 3439* *********************************** 3440 3441 subroutine D3dB_C3dB_r_Copy(nb,A_d3db,B_c3db) 3442 implicit none 3443 integer nb 3444 real*8 A_d3db(*) 3445 real*8 B_c3db(*) 3446 3447#include "C3dB.fh" 3448 3449 integer q,indx1,indx2,nqq 3450 3451 !**** slab mapping **** 3452 if (mapping.eq.1) then 3453 nqq = nq(nb)*ny(nb) 3454 !**** hilbert mapping **** 3455 else 3456 nqq = nq1(nb) 3457 end if 3458 3459 indx1 = 1 3460 indx2 = 1 3461 do q=1,nqq 3462 call dcopy(nx(nb),A_d3db(indx1),1,B_c3dB(indx2),1) 3463 indx1 = indx1 + (nx(nb)+2) 3464 indx2 = indx2 + nx(nb) 3465 end do 3466 return 3467 end 3468 3469 3470 subroutine C3dB_pfft_index1_copy(n,index,a,b) 3471 implicit none 3472 integer n 3473 integer index(*) 3474 complex*16 a(*),b(*) 3475 integer i 3476#ifndef CRAY 3477!DIR$ ivdep 3478#endif 3479!$OMP DO 3480 do i=1,n 3481 b(i) = a(index(i)) 3482 end do 3483!$OMP END DO 3484 return 3485 end 3486 3487 subroutine C3dB_pfft_index2_copy(n,index,a,b) 3488 implicit none 3489 integer n 3490 integer index(*) 3491 complex*16 a(*),b(*) 3492 integer i 3493#ifndef CRAY 3494!DIR$ ivdep 3495#endif 3496!$OMP DO 3497 do i=1,n 3498 b(index(i)) = a(i) 3499 end do 3500!$OMP END DO 3501 return 3502 end 3503 3504 subroutine C3dB_pfft_index2_zero(n,index,a) 3505 implicit none 3506 integer n 3507 integer index(*) 3508 complex*16 a(*) 3509 integer i 3510#ifndef CRAY 3511!DIR$ ivdep 3512#endif 3513!$OMP DO 3514 do i=1,n 3515 a(index(i)) = 0.0d0 3516 end do 3517!$OMP END DO 3518 return 3519 end 3520 3521