1! --- 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt . 6! See Docs/Contributors.txt for a list of contributors. 7! --- 8 module m_fft_gpfa 9! 10! 1d fft routine based on Clive Temperton's GPFA package 11! 12 implicit none 13 14 ! Only double precision support in this module 15 integer, parameter :: dp = selected_real_kind(10,100) 16 17 18 ! This module is not thread safe due to the use of 19 ! these global variables. 20 ! To make it thread-safe, and to have more control over 21 ! the re-use of the auxiliary twiddle factors, you might 22 ! define a derived type to hold them and call the wrapper routines 23 ! with it as one of the arguments. 24 25 real(dp), allocatable, save :: trigs(:) 26 integer, save :: nold = -1 27 !---------------------------------------------------------- 28 ! auxiliary variable 29 character(len=20) :: msg 30 31 public :: nfft ! Generates N compatible with the fft 32 33 public :: fft_gpfa_ez ! Simple version with complex array 34 public :: fft_gpfa ! General version with strides, 35 ! multiple vectors, etc 36 37 ! Wrapper for setgpfa which returns proper length of work array 38 public :: setgpfa_check 39 40 ! Access to the standard legacy routines, if needed 41 public :: gpfa, setgpfa 42 43 private 44 45 CONTAINS 46 47 SUBROUTINE nfft( N ) 48 49 integer, intent(inout) :: N 50 51C CHANGES N INTO THE NEXT INTEGER ALLOWED BY THE FFT ROUTINE 52C WRITTEN BY J.M.SOLER. MAY/95. 53! Modified by A. Garcia, May 2015 54 55 integer :: np, nmax, nmin, nrem, ip 56 parameter (NP = 3, NMAX = huge(N)) ! no limits with huge 57 integer :: IPRIME(NP) 58 data IPRIME / 2, 3, 5 / 59 60 NMIN = N 61 DO N = NMIN, NMAX 62 NREM = N 63 DO IP = 1,NP 64 10 CONTINUE 65 IF ( MODULO( NREM, IPRIME(IP) ) .EQ. 0 ) THEN 66 NREM = NREM / IPRIME(IP) 67 GOTO 10 68 ENDIF 69 ENDDO 70 IF (NREM .EQ. 1) RETURN 71 ENDDO 72 write(msg,"(i0)") NMIN 73 call die('NFFT: NO SUITABLE INTEGER FOUND FOR N =' // trim(msg)) 74 END SUBROUTINE nfft 75 76 subroutine fft_gpfa_ez(data,n,isign) 77 78 ! packed complex array in a real array twice as long 79 80 integer, intent(in) :: n 81 real(dp), intent(inout) :: data(2*n) 82 integer, intent(in) :: isign 83 84! ISIGN = +1 FOR FORWARD TRANSFORM 85! = -1 FOR INVERSE TRANSFORM 86 87 ! If n has changed since the last use, 88 ! set trigs array, used by 1D-FFT routine gpfa 89 90 ! The routine setgpfa_alloc is a special version 91 ! of setgpfa which (re)allocates the trigs argument 92 ! if needed, and re-fills it with new twiddle factors 93 94 if (n /= nold) then 95 call setgpfa_alloc( trigs, n ) 96 end if 97 98 ! FFT. Arguments of gpfa are: 99 ! gpfa( realData(*), imagData(*), trigs(*), 100 ! dataSpan, vectorSpan, vectorSize, numVectors, fftDir ) 101 ! 102 ! Note dataSpan=2 since we are packing the real and imaginary 103 ! parts in the same vector. 104 105 call gpfa( data(1), data(2), trigs, 106 $ 2, 1, n, 1, isign ) 107 108 nold = n 109 110 end subroutine fft_gpfa_ez 111 112 subroutine fft_gpfa(realData, imagData, 113 $ dataSpan, vectorSpan, N, numVectors, 114 $ isign ) 115 116 ! A wrapper for the GPFA routine which handles the 117 ! work array automatically 118 119 real(dp), intent(inout) :: realData(*), imagData(*) 120 integer, intent(in) :: dataSpan, vectorSpan, N, numVectors 121 integer, intent(in) :: isign 122 123! ISIGN = +1 FOR FORWARD TRANSFORM 124! = -1 FOR INVERSE TRANSFORM 125 126 ! Set trigs array, used by 1D-FFT routine gpfa 127 ! The routine setgpfa_alloc is a special version 128 ! of setgpfa which (re)allocates the trigs argument 129 ! if needed, and re-fills it with new twiddle factors 130 ! if n has changed since the last use. 131 132 if (n /= nold) then 133 call setgpfa_alloc( trigs, n ) 134 end if 135 136 call gpfa(realData, imagData, trigs, 137 $ dataSpan, vectorSpan, N, numVectors, isign) 138 139 nold = n 140 141 end subroutine fft_gpfa 142!------------------------------------------------------------------ 143 subroutine setgpfa_alloc( trigs, n ) 144 real(dp), allocatable, intent(inout) :: trigs(:) 145 integer, intent(in) :: n 146 147 integer :: ntrigs 148 149 if (.not. allocated(trigs)) then 150 allocate(trigs(100)) 151 endif 152 call setgpfa_check(trigs,size(trigs),ntrigs,n) 153 154 if (size(trigs) < ntrigs) then 155 deallocate(trigs) 156 allocate(trigs(ntrigs)) 157 call setgpfa_check(trigs,size(trigs),ntrigs,n) 158 if (size(trigs) < ntrigs) STOP "ntrigs error" 159 endif 160 161 end subroutine setgpfa_alloc 162! 163 SUBROUTINE SETGPFA_check(TRIGS,maxtrigs,ntrigs,N) 164* 165 integer, intent(in) :: maxtrigs ! The current size of trigs 166 REAL(dp) TRIGS(maxtrigs) 167 integer, intent(out) :: ntrigs ! The needed size for trigs 168 integer, intent(in) :: N 169 170 INTEGER NJ(3) 171 integer :: nn, ifac, ll, kk, ip, iq, ir, 172 $ i, ni, irot, kink, k 173 real(dp) :: angle, twopi, del 174* 175* DECOMPOSE N INTO FACTORS 2,3,5 176* ------------------------------ 177 NN = N 178 IFAC = 2 179* 180 DO 30 LL = 1 , 3 181 KK = 0 182 10 CONTINUE 183 IF (MOD(NN,IFAC).NE.0) GO TO 20 184 KK = KK + 1 185 NN = NN / IFAC 186 GO TO 10 187 20 CONTINUE 188 NJ(LL) = KK 189 IFAC = IFAC + LL 190 30 CONTINUE 191* 192 IF (NN.NE.1) THEN 193 write(msg,"(i0)") N 194 call die('GPFA: '//trim(msg)//' IS NOT A LEGAL VALUE OF N') 195 ENDIF 196* 197 IP = NJ(1) 198 IQ = NJ(2) 199 IR = NJ(3) 200* 201* COMPUTE LIST OF ROTATED TWIDDLE FACTORS 202* --------------------------------------- 203 NJ(1) = 2**IP 204 NJ(2) = 3**IQ 205 NJ(3) = 5**IR 206* 207 TWOPI = 4.0_dp * ASIN(1.0_dp) 208 I = 1 209* 210 DO 60 LL = 1 , 3 211 NI = NJ(LL) 212 IF (NI.EQ.1) GO TO 60 213* 214 DEL = TWOPI / real(NI,kind=dp) 215 IROT = N / NI 216 KINK = MOD(IROT,NI) 217 KK = 0 218* 219 DO 50 K = 1 , NI 220 ANGLE = real(KK,kind=dp) * DEL 221 if (i.lt.maxtrigs) then 222 TRIGS(I) = COS(ANGLE) 223 TRIGS(I+1) = SIN(ANGLE) 224 endif 225 I = I + 2 226 KK = KK + KINK 227 IF (KK.GT.NI) KK = KK - NI 228 50 CONTINUE 229 60 CONTINUE 230 ntrigs = i - 1 ! note boundary. i is even 231* 232 RETURN 233 END SUBROUTINE SETGPFA_check 234 235!-------------------------------------------------------------- 236! 237! What follows is the original code by Temperton 238! with explicit declarations of variables and converted to 'dp'. 239! 240! For arbitrary-precision support the parameter constants might 241! be computed instead of inserted explicitly. 242! 243! For example: sin60 = sin(2.0_wp*pi/3.0_wp), where wp is the working 244! precision, and 245! pi = 4.0_wp * atan(1.0_wp), or 246! twopi = 4.0_wp * asin(1.0_wp) 247! 248!********************************************************************* 249! 250! SUBROUTINE 'SETGPFA' 251! SETUP ROUTINE FOR SELF-SORTING IN-PLACE 252! GENERALIZED PRIME FACTOR (COMPLEX) FFT [GPFA] 253! 254! CALL SETGPFA(TRIGS,N) 255! 256! INPUT : 257! ----- 258! N IS THE LENGTH OF THE TRANSFORMS. N MUST BE OF THE FORM: 259! ----------------------------------- 260! N = (2**IP) * (3**IQ) * (5**IR) 261! ----------------------------------- 262! 263! OUTPUT: 264! ------ 265! TRIGS IS A TABLE OF TWIDDLE FACTORS, 266! OF LENGTH 2*IPQR (REAL) WORDS, WHERE: 267! -------------------------------------- 268! IPQR = (2**IP) + (3**IQ) + (5**IR) 269! -------------------------------------- 270! 271! WRITTEN BY CLIVE TEMPERTON 1990 272! 273!********************************************************************* 274! 275 SUBROUTINE SETGPFA(TRIGS,N) 276* 277 278 INTEGER NJ(3) 279 REAL(dp) TRIGS(*) 280 281 integer :: nn, n, ifac, ll, kk, ip, iq, ir, 282 $ i, ni, irot, kink, k 283 real(dp) :: angle, twopi, del 284* 285* DECOMPOSE N INTO FACTORS 2,3,5 286* ------------------------------ 287 NN = N 288 IFAC = 2 289* 290 DO 30 LL = 1 , 3 291 KK = 0 292 10 CONTINUE 293 IF (MOD(NN,IFAC).NE.0) GO TO 20 294 KK = KK + 1 295 NN = NN / IFAC 296 GO TO 10 297 20 CONTINUE 298 NJ(LL) = KK 299 IFAC = IFAC + LL 300 30 CONTINUE 301* 302 IF (NN.NE.1) THEN 303 write(msg,"(i0)") N 304 call die('GPFA: '//trim(msg)//' IS NOT A LEGAL VALUE OF N') 305 ENDIF 306* 307 IP = NJ(1) 308 IQ = NJ(2) 309 IR = NJ(3) 310* 311* COMPUTE LIST OF ROTATED TWIDDLE FACTORS 312* --------------------------------------- 313 NJ(1) = 2**IP 314 NJ(2) = 3**IQ 315 NJ(3) = 5**IR 316* 317 TWOPI = 4.0_dp * ASIN(1.0_dp) 318 I = 1 319* 320 DO 60 LL = 1 , 3 321 NI = NJ(LL) 322 IF (NI.EQ.1) GO TO 60 323* 324 DEL = TWOPI / real(NI,kind=dp) 325 IROT = N / NI 326 KINK = MOD(IROT,NI) 327 KK = 0 328* 329 DO 50 K = 1 , NI 330 ANGLE = real(KK,kind=dp) * DEL 331 TRIGS(I) = COS(ANGLE) 332 TRIGS(I+1) = SIN(ANGLE) 333 I = I + 2 334 KK = KK + KINK 335 IF (KK.GT.NI) KK = KK - NI 336 50 CONTINUE 337 60 CONTINUE 338* 339 RETURN 340 END SUBROUTINE SETGPFA 341 342 343! SUBROUTINE 'GPFA' 344! SELF-SORTING IN-PLACE GENERALIZED PRIME FACTOR (COMPLEX) FFT 345! 346! *** THIS IS THE ALL-FORTRAN VERSION *** 347! ------------------------------- 348! 349! CALL GPFA(A,B,TRIGS,INC,JUMP,N,LOT,ISIGN) 350! 351! A IS FIRST REAL INPUT/OUTPUT VECTOR 352! B IS FIRST IMAGINARY INPUT/OUTPUT VECTOR 353! TRIGS IS A TABLE OF TWIDDLE FACTORS, PRECALCULATED 354! BY CALLING SUBROUTINE 'SETGPFA' 355! INC IS THE INCREMENT WITHIN EACH DATA VECTOR 356! JUMP IS THE INCREMENT BETWEEN DATA VECTORS 357! N IS THE LENGTH OF THE TRANSFORMS: 358! ----------------------------------- 359! N = (2**IP) * (3**IQ) * (5**IR) 360! ----------------------------------- 361! LOT IS THE NUMBER OF TRANSFORMS 362! ISIGN = +1 FOR FORWARD TRANSFORM 363! = -1 FOR INVERSE TRANSFORM 364! 365! WRITTEN BY CLIVE TEMPERTON 366! RECHERCHE EN PREVISION NUMERIQUE 367! ATMOSPHERIC ENVIRONMENT SERVICE, CANADA 368! 369!---------------------------------------------------------------------- 370! 371! DEFINITION OF TRANSFORM 372! ----------------------- 373! 374! X(J) = SUM(K=0,...,N-1)(C(K)*EXP(ISIGN*2*I*J*K*PI/N)) 375! 376!--------------------------------------------------------------------- 377! 378! FOR A MATHEMATICAL DEVELOPMENT OF THE ALGORITHM USED, 379! SEE: 380! 381! C TEMPERTON : "A GENERALIZED PRIME FACTOR FFT ALGORITHM 382! FOR ANY N = (2**P)(3**Q)(5**R)", 383! SIAM J. SCI. STAT. COMP., MAY 1992. 384! 385 386 SUBROUTINE GPFA(A,B,TRIGS,INC,JUMP,N,LOT,ISIGN) 387 388 IMPLICIT NONE 389* 390 REAL(dp) :: A(*), B(*) 391 REAL(dp) TRIGS(*) 392 INTEGER INC, JUMP, N, LOT, ISIGN 393C 394 INTEGER 395 . NJ(3), NN, IFAC, LL, KK, IP, IQ, IR, I 396* 397* DECOMPOSE N INTO FACTORS 2,3,5 398* ------------------------------ 399 NN = N 400 IFAC = 2 401* 402 DO 30 LL = 1 , 3 403 KK = 0 404 10 CONTINUE 405 IF (MOD(NN,IFAC).NE.0) GO TO 20 406 KK = KK + 1 407 NN = NN / IFAC 408 GO TO 10 409 20 CONTINUE 410 NJ(LL) = KK 411 IFAC = IFAC + LL 412 30 CONTINUE 413* 414 IF (NN.NE.1) THEN 415 write(msg,"(i0)") N 416 call die('GPFA: '//trim(msg)//' IS NOT A LEGAL VALUE OF N') 417 ENDIF 418* 419 IP = NJ(1) 420 IQ = NJ(2) 421 IR = NJ(3) 422* 423* COMPUTE THE TRANSFORM 424* --------------------- 425 I = 1 426 IF (IP.GT.0) THEN 427 CALL GPFA2F(A,B,TRIGS,INC,JUMP,N,IP,LOT,ISIGN) 428 I = I + 2 * ( 2**IP) 429 ENDIF 430 IF (IQ.GT.0) THEN 431 CALL GPFA3F(A,B,TRIGS(I),INC,JUMP,N,IQ,LOT,ISIGN) 432 I = I + 2 * (3**IQ) 433 ENDIF 434 IF (IR.GT.0) THEN 435 CALL GPFA5F(A,B,TRIGS(I),INC,JUMP,N,IR,LOT,ISIGN) 436 ENDIF 437* 438 RETURN 439 END SUBROUTINE GPFA 440 441 442!********************************************************************* 443 444* fortran version of *gpfa2* - 445* radix-2 section of self-sorting, in-place, generalized pfa 446* central radix-2 and radix-8 passes included 447* so that transform length can be any power of 2 448* 449 450 subroutine gpfa2f(a,b,trigs,inc,jump,n,mm,lot,isign) 451 452 real(dp) :: a(*), b(*) 453 real(dp) :: trigs(*) 454 integer inc, jump, n, mm, lot, isign 455 456 integer n2, inq, jstepx, ninc, ink, m2, m8, 457 $ m, mh, nblox, left, istart, nb, nvex, 458 $ la, mu, ipass, jstep, jstepl, jjj, ja, 459 $ nu, jb, jc, jd, j, l, kk, k, je, jf, 460 $ jg, jh, laincl, ll, ji, jj, jk, jl, jm, 461 $ jn, jo, jp 462 463 real(dp) :: s, aja, ajc, t0, t2, ajb, ajd, 464 $ t1, t3, bja, bjc, u0, u2, bjb, 465 $ bjd, u1, u3, co1, si1, co2, si2, 466 $ co3, si3, ss, c1, c2, c3, aje, 467 $ ajf, ajg, ajh, bje, bjf, bjg, 468 $ bjh, co4, si4, co5, si5, co6, si6, 469 $ co7, si7, aji, bjm, ajj, bjj, ajk, 470 $ ajl, bji, bjk, ajo, bjl, bjo, ajm, 471 $ ajn, ajp, bjn, bjp 472 473#ifdef OLD_CRAY 474 integer, parameter :: lvr = CRAY_LVR_PARAMETER 475#else 476 integer, parameter :: lvr = 1024 477#endif 478* 479* *************************************************************** 480* * * 481* * N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. * 482* * RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE * 483* * (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER. * 484* * * 485* *************************************************************** 486* 487 n2 = 2**mm 488 inq = n/n2 489 jstepx = (n2-n) * inc 490 ninc = n * inc 491 ink = inc * inq 492* 493 m2 = 0 494 m8 = 0 495 if (mod(mm,2).eq.0) then 496 m = mm/2 497 else if (mod(mm,4).eq.1) then 498 m = (mm-1)/2 499 m2 = 1 500 else if (mod(mm,4).eq.3) then 501 m = (mm-3)/2 502 m8 = 1 503 endif 504 mh = (m+1)/2 505* 506 nblox = 1 + (lot-1)/lvr 507 left = lot 508 s = real(isign,kind=dp) 509 istart = 1 510* 511* loop on blocks of lvr transforms 512* -------------------------------- 513 do 500 nb = 1 , nblox 514* 515 if (left.le.lvr) then 516 nvex = left 517 else if (left.lt.(2*lvr)) then 518 nvex = left/2 519 nvex = nvex + mod(nvex,2) 520 else 521 nvex = lvr 522 endif 523 left = left - nvex 524* 525 la = 1 526* 527* loop on type I radix-4 passes 528* ----------------------------- 529 mu = mod(inq,4) 530 if (isign.eq.-1) mu = 4 - mu 531 ss = 1.0_dp 532 if (mu.eq.3) ss = -1.0_dp 533* 534 if (mh.eq.0) go to 200 535* 536 do 160 ipass = 1 , mh 537 jstep = (n*inc) / (4*la) 538 jstepl = jstep - ninc 539* 540* k = 0 loop (no twiddle factors) 541* ------------------------------- 542 do 120 jjj = 0 , (n-1)*inc , 4*jstep 543 ja = istart + jjj 544* 545* "transverse" loop 546* ----------------- 547 do 115 nu = 1 , inq 548 jb = ja + jstepl 549 if (jb.lt.istart) jb = jb + ninc 550 jc = jb + jstepl 551 if (jc.lt.istart) jc = jc + ninc 552 jd = jc + jstepl 553 if (jd.lt.istart) jd = jd + ninc 554 j = 0 555* 556* loop across transforms 557* ---------------------- 558#ifdef OLD_CRAY 559cdir$ ivdep, shortloop 560#endif 561 do 110 l = 1 , nvex 562 aja = a(ja+j) 563 ajc = a(jc+j) 564 t0 = aja + ajc 565 t2 = aja - ajc 566 ajb = a(jb+j) 567 ajd = a(jd+j) 568 t1 = ajb + ajd 569 t3 = ss * ( ajb - ajd ) 570 bja = b(ja+j) 571 bjc = b(jc+j) 572 u0 = bja + bjc 573 u2 = bja - bjc 574 bjb = b(jb+j) 575 bjd = b(jd+j) 576 u1 = bjb + bjd 577 u3 = ss * ( bjb - bjd ) 578 a(ja+j) = t0 + t1 579 a(jc+j) = t0 - t1 580 b(ja+j) = u0 + u1 581 b(jc+j) = u0 - u1 582 a(jb+j) = t2 - u3 583 a(jd+j) = t2 + u3 584 b(jb+j) = u2 + t3 585 b(jd+j) = u2 - t3 586 j = j + jump 587 110 continue 588 ja = ja + jstepx 589 if (ja.lt.istart) ja = ja + ninc 590 115 continue 591 120 continue 592* 593* finished if n2 = 4 594* ------------------ 595 if (n2.eq.4) go to 490 596 kk = 2 * la 597* 598* loop on nonzero k 599* ----------------- 600 do 150 k = ink , jstep-ink , ink 601 co1 = trigs(kk+1) 602 si1 = s*trigs(kk+2) 603 co2 = trigs(2*kk+1) 604 si2 = s*trigs(2*kk+2) 605 co3 = trigs(3*kk+1) 606 si3 = s*trigs(3*kk+2) 607* 608* loop along transform 609* -------------------- 610 do 140 jjj = k , (n-1)*inc , 4*jstep 611 ja = istart + jjj 612* 613* "transverse" loop 614* ----------------- 615 do 135 nu = 1 , inq 616 jb = ja + jstepl 617 if (jb.lt.istart) jb = jb + ninc 618 jc = jb + jstepl 619 if (jc.lt.istart) jc = jc + ninc 620 jd = jc + jstepl 621 if (jd.lt.istart) jd = jd + ninc 622 j = 0 623* 624* loop across transforms 625* ---------------------- 626#ifdef OLD_CRAY 627cdir$ ivdep, shortloop 628#endif 629 do 130 l = 1 , nvex 630 aja = a(ja+j) 631 ajc = a(jc+j) 632 t0 = aja + ajc 633 t2 = aja - ajc 634 ajb = a(jb+j) 635 ajd = a(jd+j) 636 t1 = ajb + ajd 637 t3 = ss * ( ajb - ajd ) 638 bja = b(ja+j) 639 bjc = b(jc+j) 640 u0 = bja + bjc 641 u2 = bja - bjc 642 bjb = b(jb+j) 643 bjd = b(jd+j) 644 u1 = bjb + bjd 645 u3 = ss * ( bjb - bjd ) 646 a(ja+j) = t0 + t1 647 b(ja+j) = u0 + u1 648 a(jb+j) = co1*(t2-u3) - si1*(u2+t3) 649 b(jb+j) = si1*(t2-u3) + co1*(u2+t3) 650 a(jc+j) = co2*(t0-t1) - si2*(u0-u1) 651 b(jc+j) = si2*(t0-t1) + co2*(u0-u1) 652 a(jd+j) = co3*(t2+u3) - si3*(u2-t3) 653 b(jd+j) = si3*(t2+u3) + co3*(u2-t3) 654 j = j + jump 655 130 continue 656*-----( end of loop across transforms ) 657 ja = ja + jstepx 658 if (ja.lt.istart) ja = ja + ninc 659 135 continue 660 140 continue 661*-----( end of loop along transforms ) 662 kk = kk + 2*la 663 150 continue 664*-----( end of loop on nonzero k ) 665 la = 4*la 666 160 continue 667*-----( end of loop on type I radix-4 passes) 668* 669* central radix-2 pass 670* -------------------- 671 200 continue 672 if (m2.eq.0) go to 300 673* 674 jstep = (n*inc) / (2*la) 675 jstepl = jstep - ninc 676* 677* k=0 loop (no twiddle factors) 678* ----------------------------- 679 do 220 jjj = 0 , (n-1)*inc , 2*jstep 680 ja = istart + jjj 681* 682* "transverse" loop 683* ----------------- 684 do 215 nu = 1 , inq 685 jb = ja + jstepl 686 if (jb.lt.istart) jb = jb + ninc 687 j = 0 688* 689* loop across transforms 690* ---------------------- 691#ifdef OLD_CRAY 692cdir$ ivdep, shortloop 693#endif 694 do 210 l = 1 , nvex 695 aja = a(ja+j) 696 ajb = a(jb+j) 697 t0 = aja - ajb 698 a(ja+j) = aja + ajb 699 a(jb+j) = t0 700 bja = b(ja+j) 701 bjb = b(jb+j) 702 u0 = bja - bjb 703 b(ja+j) = bja + bjb 704 b(jb+j) = u0 705 j = j + jump 706 210 continue 707*-----(end of loop across transforms) 708 ja = ja + jstepx 709 if (ja.lt.istart) ja = ja + ninc 710 215 continue 711 220 continue 712* 713* finished if n2=2 714* ---------------- 715 if (n2.eq.2) go to 490 716* 717 kk = 2 * la 718* 719* loop on nonzero k 720* ----------------- 721 do 260 k = ink , jstep - ink , ink 722 co1 = trigs(kk+1) 723 si1 = s*trigs(kk+2) 724* 725* loop along transforms 726* --------------------- 727 do 250 jjj = k , (n-1)*inc , 2*jstep 728 ja = istart + jjj 729* 730* "transverse" loop 731* ----------------- 732 do 245 nu = 1 , inq 733 jb = ja + jstepl 734 if (jb.lt.istart) jb = jb + ninc 735 j = 0 736* 737* loop across transforms 738* ---------------------- 739 if (kk.eq.n2/2) then 740#ifdef OLD_CRAY 741cdir$ ivdep, shortloop 742#endif 743 do 230 l = 1 , nvex 744 aja = a(ja+j) 745 ajb = a(jb+j) 746 t0 = ss * ( aja - ajb ) 747 a(ja+j) = aja + ajb 748 bjb = b(jb+j) 749 bja = b(ja+j) 750 a(jb+j) = ss * ( bjb - bja ) 751 b(ja+j) = bja + bjb 752 b(jb+j) = t0 753 j = j + jump 754 230 continue 755* 756 else 757* 758#ifdef OLD_CRAY 759cdir$ ivdep, shortloop 760#endif 761 do 240 l = 1 , nvex 762 aja = a(ja+j) 763 ajb = a(jb+j) 764 t0 = aja - ajb 765 a(ja+j) = aja + ajb 766 bja = b(ja+j) 767 bjb = b(jb+j) 768 u0 = bja - bjb 769 b(ja+j) = bja + bjb 770 a(jb+j) = co1*t0 - si1*u0 771 b(jb+j) = si1*t0 + co1*u0 772 j = j + jump 773 240 continue 774* 775 endif 776* 777*-----(end of loop across transforms) 778 ja = ja + jstepx 779 if (ja.lt.istart) ja = ja + ninc 780 245 continue 781 250 continue 782*-----(end of loop along transforms) 783 kk = kk + 2 * la 784 260 continue 785*-----(end of loop on nonzero k) 786*-----(end of radix-2 pass) 787* 788 la = 2 * la 789 go to 400 790* 791* central radix-8 pass 792* -------------------- 793 300 continue 794 if (m8.eq.0) go to 400 795 jstep = (n*inc) / (8*la) 796 jstepl = jstep - ninc 797 mu = mod(inq,8) 798 if (isign.eq.-1) mu = 8 - mu 799 c1 = 1.0_dp 800 if (mu.eq.3.or.mu.eq.7) c1 = -1.0_dp 801 c2 = sqrt(0.5_dp) 802 if (mu.eq.3.or.mu.eq.5) c2 = -c2 803 c3 = c1 * c2 804* 805* stage 1 806* ------- 807 do 320 k = 0 , jstep - ink , ink 808 do 315 jjj = k , (n-1)*inc , 8*jstep 809 ja = istart + jjj 810* 811* "transverse" loop 812* ----------------- 813 do 312 nu = 1 , inq 814 jb = ja + jstepl 815 if (jb.lt.istart) jb = jb + ninc 816 jc = jb + jstepl 817 if (jc.lt.istart) jc = jc + ninc 818 jd = jc + jstepl 819 if (jd.lt.istart) jd = jd + ninc 820 je = jd + jstepl 821 if (je.lt.istart) je = je + ninc 822 jf = je + jstepl 823 if (jf.lt.istart) jf = jf + ninc 824 jg = jf + jstepl 825 if (jg.lt.istart) jg = jg + ninc 826 jh = jg + jstepl 827 if (jh.lt.istart) jh = jh + ninc 828 j = 0 829#ifdef OLD_CRAY 830cdir$ ivdep, shortloop 831#endif 832 do 310 l = 1 , nvex 833 aja = a(ja+j) 834 aje = a(je+j) 835 t0 = aja - aje 836 a(ja+j) = aja + aje 837 ajc = a(jc+j) 838 ajg = a(jg+j) 839 t1 = c1 * ( ajc - ajg ) 840 a(je+j) = ajc + ajg 841 ajb = a(jb+j) 842 ajf = a(jf+j) 843 t2 = ajb - ajf 844 a(jc+j) = ajb + ajf 845 ajd = a(jd+j) 846 ajh = a(jh+j) 847 t3 = ajd - ajh 848 a(jg+j) = ajd + ajh 849 a(jb+j) = t0 850 a(jf+j) = t1 851 a(jd+j) = c2 * ( t2 - t3 ) 852 a(jh+j) = c3 * ( t2 + t3 ) 853 bja = b(ja+j) 854 bje = b(je+j) 855 u0 = bja - bje 856 b(ja+j) = bja + bje 857 bjc = b(jc+j) 858 bjg = b(jg+j) 859 u1 = c1 * ( bjc - bjg ) 860 b(je+j) = bjc + bjg 861 bjb = b(jb+j) 862 bjf = b(jf+j) 863 u2 = bjb - bjf 864 b(jc+j) = bjb + bjf 865 bjd = b(jd+j) 866 bjh = b(jh+j) 867 u3 = bjd - bjh 868 b(jg+j) = bjd + bjh 869 b(jb+j) = u0 870 b(jf+j) = u1 871 b(jd+j) = c2 * ( u2 - u3 ) 872 b(jh+j) = c3 * ( u2 + u3 ) 873 j = j + jump 874 310 continue 875 ja = ja + jstepx 876 if (ja.lt.istart) ja = ja + ninc 877 312 continue 878 315 continue 879 320 continue 880* 881* stage 2 882* ------- 883* 884* k=0 (no twiddle factors) 885* ------------------------ 886 do 330 jjj = 0 , (n-1)*inc , 8*jstep 887 ja = istart + jjj 888* 889* "transverse" loop 890* ----------------- 891 do 328 nu = 1 , inq 892 jb = ja + jstepl 893 if (jb.lt.istart) jb = jb + ninc 894 jc = jb + jstepl 895 if (jc.lt.istart) jc = jc + ninc 896 jd = jc + jstepl 897 if (jd.lt.istart) jd = jd + ninc 898 je = jd + jstepl 899 if (je.lt.istart) je = je + ninc 900 jf = je + jstepl 901 if (jf.lt.istart) jf = jf + ninc 902 jg = jf + jstepl 903 if (jg.lt.istart) jg = jg + ninc 904 jh = jg + jstepl 905 if (jh.lt.istart) jh = jh + ninc 906 j = 0 907#ifdef OLD_CRAY 908cdir$ ivdep, shortloop 909#endif 910 do 325 l = 1 , nvex 911 aja = a(ja+j) 912 aje = a(je+j) 913 t0 = aja + aje 914 t2 = aja - aje 915 ajc = a(jc+j) 916 ajg = a(jg+j) 917 t1 = ajc + ajg 918 t3 = c1 * ( ajc - ajg ) 919 bja = b(ja+j) 920 bje = b(je+j) 921 u0 = bja + bje 922 u2 = bja - bje 923 bjc = b(jc+j) 924 bjg = b(jg+j) 925 u1 = bjc + bjg 926 u3 = c1 * ( bjc - bjg ) 927 a(ja+j) = t0 + t1 928 a(je+j) = t0 - t1 929 b(ja+j) = u0 + u1 930 b(je+j) = u0 - u1 931 a(jc+j) = t2 - u3 932 a(jg+j) = t2 + u3 933 b(jc+j) = u2 + t3 934 b(jg+j) = u2 - t3 935 ajb = a(jb+j) 936 ajd = a(jd+j) 937 t0 = ajb + ajd 938 t2 = ajb - ajd 939 ajf = a(jf+j) 940 ajh = a(jh+j) 941 t1 = ajf - ajh 942 t3 = ajf + ajh 943 bjb = b(jb+j) 944 bjd = b(jd+j) 945 u0 = bjb + bjd 946 u2 = bjb - bjd 947 bjf = b(jf+j) 948 bjh = b(jh+j) 949 u1 = bjf - bjh 950 u3 = bjf + bjh 951 a(jb+j) = t0 - u3 952 a(jh+j) = t0 + u3 953 b(jb+j) = u0 + t3 954 b(jh+j) = u0 - t3 955 a(jd+j) = t2 + u1 956 a(jf+j) = t2 - u1 957 b(jd+j) = u2 - t1 958 b(jf+j) = u2 + t1 959 j = j + jump 960 325 continue 961 ja = ja + jstepx 962 if (ja.lt.istart) ja = ja + ninc 963 328 continue 964 330 continue 965* 966 if (n2.eq.8) go to 490 967* 968* loop on nonzero k 969* ----------------- 970 kk = 2 * la 971* 972 do 350 k = ink , jstep - ink , ink 973* 974 co1 = trigs(kk+1) 975 si1 = s * trigs(kk+2) 976 co2 = trigs(2*kk+1) 977 si2 = s * trigs(2*kk+2) 978 co3 = trigs(3*kk+1) 979 si3 = s * trigs(3*kk+2) 980 co4 = trigs(4*kk+1) 981 si4 = s * trigs(4*kk+2) 982 co5 = trigs(5*kk+1) 983 si5 = s * trigs(5*kk+2) 984 co6 = trigs(6*kk+1) 985 si6 = s * trigs(6*kk+2) 986 co7 = trigs(7*kk+1) 987 si7 = s * trigs(7*kk+2) 988* 989 do 345 jjj = k , (n-1)*inc , 8*jstep 990 ja = istart + jjj 991* 992* "transverse" loop 993* ----------------- 994 do 342 nu = 1 , inq 995 jb = ja + jstepl 996 if (jb.lt.istart) jb = jb + ninc 997 jc = jb + jstepl 998 if (jc.lt.istart) jc = jc + ninc 999 jd = jc + jstepl 1000 if (jd.lt.istart) jd = jd + ninc 1001 je = jd + jstepl 1002 if (je.lt.istart) je = je + ninc 1003 jf = je + jstepl 1004 if (jf.lt.istart) jf = jf + ninc 1005 jg = jf + jstepl 1006 if (jg.lt.istart) jg = jg + ninc 1007 jh = jg + jstepl 1008 if (jh.lt.istart) jh = jh + ninc 1009 j = 0 1010#ifdef OLD_CRAY 1011cdir$ ivdep, shortloop 1012#endif 1013 do 340 l = 1 , nvex 1014 aja = a(ja+j) 1015 aje = a(je+j) 1016 t0 = aja + aje 1017 t2 = aja - aje 1018 ajc = a(jc+j) 1019 ajg = a(jg+j) 1020 t1 = ajc + ajg 1021 t3 = c1 * ( ajc - ajg ) 1022 bja = b(ja+j) 1023 bje = b(je+j) 1024 u0 = bja + bje 1025 u2 = bja - bje 1026 bjc = b(jc+j) 1027 bjg = b(jg+j) 1028 u1 = bjc + bjg 1029 u3 = c1 * ( bjc - bjg ) 1030 a(ja+j) = t0 + t1 1031 b(ja+j) = u0 + u1 1032 a(je+j) = co4*(t0-t1) - si4*(u0-u1) 1033 b(je+j) = si4*(t0-t1) + co4*(u0-u1) 1034 a(jc+j) = co2*(t2-u3) - si2*(u2+t3) 1035 b(jc+j) = si2*(t2-u3) + co2*(u2+t3) 1036 a(jg+j) = co6*(t2+u3) - si6*(u2-t3) 1037 b(jg+j) = si6*(t2+u3) + co6*(u2-t3) 1038 ajb = a(jb+j) 1039 ajd = a(jd+j) 1040 t0 = ajb + ajd 1041 t2 = ajb - ajd 1042 ajf = a(jf+j) 1043 ajh = a(jh+j) 1044 t1 = ajf - ajh 1045 t3 = ajf + ajh 1046 bjb = b(jb+j) 1047 bjd = b(jd+j) 1048 u0 = bjb + bjd 1049 u2 = bjb - bjd 1050 bjf = b(jf+j) 1051 bjh = b(jh+j) 1052 u1 = bjf - bjh 1053 u3 = bjf + bjh 1054 a(jb+j) = co1*(t0-u3) - si1*(u0+t3) 1055 b(jb+j) = si1*(t0-u3) + co1*(u0+t3) 1056 a(jh+j) = co7*(t0+u3) - si7*(u0-t3) 1057 b(jh+j) = si7*(t0+u3) + co7*(u0-t3) 1058 a(jd+j) = co3*(t2+u1) - si3*(u2-t1) 1059 b(jd+j) = si3*(t2+u1) + co3*(u2-t1) 1060 a(jf+j) = co5*(t2-u1) - si5*(u2+t1) 1061 b(jf+j) = si5*(t2-u1) + co5*(u2+t1) 1062 j = j + jump 1063 340 continue 1064 ja = ja + jstepx 1065 if (ja.lt.istart) ja = ja + ninc 1066 342 continue 1067 345 continue 1068 kk = kk + 2 * la 1069 350 continue 1070* 1071 la = 8 * la 1072* 1073* loop on type II radix-4 passes 1074* ------------------------------ 1075 400 continue 1076 mu = mod(inq,4) 1077 if (isign.eq.-1) mu = 4 - mu 1078 ss = 1.0_dp 1079 if (mu.eq.3) ss = -1.0_dp 1080* 1081 do 480 ipass = mh+1 , m 1082 jstep = (n*inc) / (4*la) 1083 jstepl = jstep - ninc 1084 laincl = la * ink - ninc 1085* 1086* k=0 loop (no twiddle factors) 1087* ----------------------------- 1088 do 430 ll = 0 , (la-1)*ink , 4*jstep 1089* 1090 do 420 jjj = ll , (n-1)*inc , 4*la*ink 1091 ja = istart + jjj 1092* 1093* "transverse" loop 1094* ----------------- 1095 do 415 nu = 1 , inq 1096 jb = ja + jstepl 1097 if (jb.lt.istart) jb = jb + ninc 1098 jc = jb + jstepl 1099 if (jc.lt.istart) jc = jc + ninc 1100 jd = jc + jstepl 1101 if (jd.lt.istart) jd = jd + ninc 1102 je = ja + laincl 1103 if (je.lt.istart) je = je + ninc 1104 jf = je + jstepl 1105 if (jf.lt.istart) jf = jf + ninc 1106 jg = jf + jstepl 1107 if (jg.lt.istart) jg = jg + ninc 1108 jh = jg + jstepl 1109 if (jh.lt.istart) jh = jh + ninc 1110 ji = je + laincl 1111 if (ji.lt.istart) ji = ji + ninc 1112 jj = ji + jstepl 1113 if (jj.lt.istart) jj = jj + ninc 1114 jk = jj + jstepl 1115 if (jk.lt.istart) jk = jk + ninc 1116 jl = jk + jstepl 1117 if (jl.lt.istart) jl = jl + ninc 1118 jm = ji + laincl 1119 if (jm.lt.istart) jm = jm + ninc 1120 jn = jm + jstepl 1121 if (jn.lt.istart) jn = jn + ninc 1122 jo = jn + jstepl 1123 if (jo.lt.istart) jo = jo + ninc 1124 jp = jo + jstepl 1125 if (jp.lt.istart) jp = jp + ninc 1126 j = 0 1127* 1128* loop across transforms 1129* ---------------------- 1130#ifdef OLD_CRAY 1131cdir$ ivdep, shortloop 1132#endif 1133 do 410 l = 1 , nvex 1134 aja = a(ja+j) 1135 ajc = a(jc+j) 1136 t0 = aja + ajc 1137 t2 = aja - ajc 1138 ajb = a(jb+j) 1139 ajd = a(jd+j) 1140 t1 = ajb + ajd 1141 t3 = ss * ( ajb - ajd ) 1142 aji = a(ji+j) 1143 ajc = aji 1144 bja = b(ja+j) 1145 bjc = b(jc+j) 1146 u0 = bja + bjc 1147 u2 = bja - bjc 1148 bjb = b(jb+j) 1149 bjd = b(jd+j) 1150 u1 = bjb + bjd 1151 u3 = ss * ( bjb - bjd ) 1152 aje = a(je+j) 1153 ajb = aje 1154 a(ja+j) = t0 + t1 1155 a(ji+j) = t0 - t1 1156 b(ja+j) = u0 + u1 1157 bjc = u0 - u1 1158 bjm = b(jm+j) 1159 bjd = bjm 1160 a(je+j) = t2 - u3 1161 ajd = t2 + u3 1162 bjb = u2 + t3 1163 b(jm+j) = u2 - t3 1164*---------------------- 1165 ajg = a(jg+j) 1166 t0 = ajb + ajg 1167 t2 = ajb - ajg 1168 ajf = a(jf+j) 1169 ajh = a(jh+j) 1170 t1 = ajf + ajh 1171 t3 = ss * ( ajf - ajh ) 1172 ajj = a(jj+j) 1173 ajg = ajj 1174 bje = b(je+j) 1175 bjg = b(jg+j) 1176 u0 = bje + bjg 1177 u2 = bje - bjg 1178 bjf = b(jf+j) 1179 bjh = b(jh+j) 1180 u1 = bjf + bjh 1181 u3 = ss * ( bjf - bjh ) 1182 b(je+j) = bjb 1183 a(jb+j) = t0 + t1 1184 a(jj+j) = t0 - t1 1185 bjj = b(jj+j) 1186 bjg = bjj 1187 b(jb+j) = u0 + u1 1188 b(jj+j) = u0 - u1 1189 a(jf+j) = t2 - u3 1190 ajh = t2 + u3 1191 b(jf+j) = u2 + t3 1192 bjh = u2 - t3 1193*---------------------- 1194 ajk = a(jk+j) 1195 t0 = ajc + ajk 1196 t2 = ajc - ajk 1197 ajl = a(jl+j) 1198 t1 = ajg + ajl 1199 t3 = ss * ( ajg - ajl ) 1200 bji = b(ji+j) 1201 bjk = b(jk+j) 1202 u0 = bji + bjk 1203 u2 = bji - bjk 1204 ajo = a(jo+j) 1205 ajl = ajo 1206 bjl = b(jl+j) 1207 u1 = bjg + bjl 1208 u3 = ss * ( bjg - bjl ) 1209 b(ji+j) = bjc 1210 a(jc+j) = t0 + t1 1211 a(jk+j) = t0 - t1 1212 bjo = b(jo+j) 1213 bjl = bjo 1214 b(jc+j) = u0 + u1 1215 b(jk+j) = u0 - u1 1216 a(jg+j) = t2 - u3 1217 a(jo+j) = t2 + u3 1218 b(jg+j) = u2 + t3 1219 b(jo+j) = u2 - t3 1220*---------------------- 1221 ajm = a(jm+j) 1222 t0 = ajm + ajl 1223 t2 = ajm - ajl 1224 ajn = a(jn+j) 1225 ajp = a(jp+j) 1226 t1 = ajn + ajp 1227 t3 = ss * ( ajn - ajp ) 1228 a(jm+j) = ajd 1229 u0 = bjd + bjl 1230 u2 = bjd - bjl 1231 bjn = b(jn+j) 1232 bjp = b(jp+j) 1233 u1 = bjn + bjp 1234 u3 = ss * ( bjn - bjp ) 1235 a(jn+j) = ajh 1236 a(jd+j) = t0 + t1 1237 a(jl+j) = t0 - t1 1238 b(jd+j) = u0 + u1 1239 b(jl+j) = u0 - u1 1240 b(jn+j) = bjh 1241 a(jh+j) = t2 - u3 1242 a(jp+j) = t2 + u3 1243 b(jh+j) = u2 + t3 1244 b(jp+j) = u2 - t3 1245 j = j + jump 1246 410 continue 1247*-----( end of loop across transforms ) 1248 ja = ja + jstepx 1249 if (ja.lt.istart) ja = ja + ninc 1250 415 continue 1251 420 continue 1252 430 continue 1253*-----( end of double loop for k=0 ) 1254* 1255* finished if last pass 1256* --------------------- 1257 if (ipass.eq.m) go to 490 1258* 1259 kk = 2*la 1260* 1261* loop on nonzero k 1262* ----------------- 1263 do 470 k = ink , jstep-ink , ink 1264 co1 = trigs(kk+1) 1265 si1 = s*trigs(kk+2) 1266 co2 = trigs(2*kk+1) 1267 si2 = s*trigs(2*kk+2) 1268 co3 = trigs(3*kk+1) 1269 si3 = s*trigs(3*kk+2) 1270* 1271* double loop along first transform in block 1272* ------------------------------------------ 1273 do 460 ll = k , (la-1)*ink , 4*jstep 1274* 1275 do 450 jjj = ll , (n-1)*inc , 4*la*ink 1276 ja = istart + jjj 1277* 1278* "transverse" loop 1279* ----------------- 1280 do 445 nu = 1 , inq 1281 jb = ja + jstepl 1282 if (jb.lt.istart) jb = jb + ninc 1283 jc = jb + jstepl 1284 if (jc.lt.istart) jc = jc + ninc 1285 jd = jc + jstepl 1286 if (jd.lt.istart) jd = jd + ninc 1287 je = ja + laincl 1288 if (je.lt.istart) je = je + ninc 1289 jf = je + jstepl 1290 if (jf.lt.istart) jf = jf + ninc 1291 jg = jf + jstepl 1292 if (jg.lt.istart) jg = jg + ninc 1293 jh = jg + jstepl 1294 if (jh.lt.istart) jh = jh + ninc 1295 ji = je + laincl 1296 if (ji.lt.istart) ji = ji + ninc 1297 jj = ji + jstepl 1298 if (jj.lt.istart) jj = jj + ninc 1299 jk = jj + jstepl 1300 if (jk.lt.istart) jk = jk + ninc 1301 jl = jk + jstepl 1302 if (jl.lt.istart) jl = jl + ninc 1303 jm = ji + laincl 1304 if (jm.lt.istart) jm = jm + ninc 1305 jn = jm + jstepl 1306 if (jn.lt.istart) jn = jn + ninc 1307 jo = jn + jstepl 1308 if (jo.lt.istart) jo = jo + ninc 1309 jp = jo + jstepl 1310 if (jp.lt.istart) jp = jp + ninc 1311 j = 0 1312* 1313* loop across transforms 1314* ---------------------- 1315#ifdef OLD_CRAY 1316cdir$ ivdep, shortloop 1317#endif 1318 do 440 l = 1 , nvex 1319 aja = a(ja+j) 1320 ajc = a(jc+j) 1321 t0 = aja + ajc 1322 t2 = aja - ajc 1323 ajb = a(jb+j) 1324 ajd = a(jd+j) 1325 t1 = ajb + ajd 1326 t3 = ss * ( ajb - ajd ) 1327 aji = a(ji+j) 1328 ajc = aji 1329 bja = b(ja+j) 1330 bjc = b(jc+j) 1331 u0 = bja + bjc 1332 u2 = bja - bjc 1333 bjb = b(jb+j) 1334 bjd = b(jd+j) 1335 u1 = bjb + bjd 1336 u3 = ss * ( bjb - bjd ) 1337 aje = a(je+j) 1338 ajb = aje 1339 a(ja+j) = t0 + t1 1340 b(ja+j) = u0 + u1 1341 a(je+j) = co1*(t2-u3) - si1*(u2+t3) 1342 bjb = si1*(t2-u3) + co1*(u2+t3) 1343 bjm = b(jm+j) 1344 bjd = bjm 1345 a(ji+j) = co2*(t0-t1) - si2*(u0-u1) 1346 bjc = si2*(t0-t1) + co2*(u0-u1) 1347 ajd = co3*(t2+u3) - si3*(u2-t3) 1348 b(jm+j) = si3*(t2+u3) + co3*(u2-t3) 1349*---------------------------------------- 1350 ajg = a(jg+j) 1351 t0 = ajb + ajg 1352 t2 = ajb - ajg 1353 ajf = a(jf+j) 1354 ajh = a(jh+j) 1355 t1 = ajf + ajh 1356 t3 = ss * ( ajf - ajh ) 1357 ajj = a(jj+j) 1358 ajg = ajj 1359 bje = b(je+j) 1360 bjg = b(jg+j) 1361 u0 = bje + bjg 1362 u2 = bje - bjg 1363 bjf = b(jf+j) 1364 bjh = b(jh+j) 1365 u1 = bjf + bjh 1366 u3 = ss * ( bjf - bjh ) 1367 b(je+j) = bjb 1368 a(jb+j) = t0 + t1 1369 b(jb+j) = u0 + u1 1370 bjj = b(jj+j) 1371 bjg = bjj 1372 a(jf+j) = co1*(t2-u3) - si1*(u2+t3) 1373 b(jf+j) = si1*(t2-u3) + co1*(u2+t3) 1374 a(jj+j) = co2*(t0-t1) - si2*(u0-u1) 1375 b(jj+j) = si2*(t0-t1) + co2*(u0-u1) 1376 ajh = co3*(t2+u3) - si3*(u2-t3) 1377 bjh = si3*(t2+u3) + co3*(u2-t3) 1378*---------------------------------------- 1379 ajk = a(jk+j) 1380 t0 = ajc + ajk 1381 t2 = ajc - ajk 1382 ajl = a(jl+j) 1383 t1 = ajg + ajl 1384 t3 = ss * ( ajg - ajl ) 1385 bji = b(ji+j) 1386 bjk = b(jk+j) 1387 u0 = bji + bjk 1388 u2 = bji - bjk 1389 ajo = a(jo+j) 1390 ajl = ajo 1391 bjl = b(jl+j) 1392 u1 = bjg + bjl 1393 u3 = ss * ( bjg - bjl ) 1394 b(ji+j) = bjc 1395 a(jc+j) = t0 + t1 1396 b(jc+j) = u0 + u1 1397 bjo = b(jo+j) 1398 bjl = bjo 1399 a(jg+j) = co1*(t2-u3) - si1*(u2+t3) 1400 b(jg+j) = si1*(t2-u3) + co1*(u2+t3) 1401 a(jk+j) = co2*(t0-t1) - si2*(u0-u1) 1402 b(jk+j) = si2*(t0-t1) + co2*(u0-u1) 1403 a(jo+j) = co3*(t2+u3) - si3*(u2-t3) 1404 b(jo+j) = si3*(t2+u3) + co3*(u2-t3) 1405*---------------------------------------- 1406 ajm = a(jm+j) 1407 t0 = ajm + ajl 1408 t2 = ajm - ajl 1409 ajn = a(jn+j) 1410 ajp = a(jp+j) 1411 t1 = ajn + ajp 1412 t3 = ss * ( ajn - ajp ) 1413 a(jm+j) = ajd 1414 u0 = bjd + bjl 1415 u2 = bjd - bjl 1416 a(jn+j) = ajh 1417 bjn = b(jn+j) 1418 bjp = b(jp+j) 1419 u1 = bjn + bjp 1420 u3 = ss * ( bjn - bjp ) 1421 b(jn+j) = bjh 1422 a(jd+j) = t0 + t1 1423 b(jd+j) = u0 + u1 1424 a(jh+j) = co1*(t2-u3) - si1*(u2+t3) 1425 b(jh+j) = si1*(t2-u3) + co1*(u2+t3) 1426 a(jl+j) = co2*(t0-t1) - si2*(u0-u1) 1427 b(jl+j) = si2*(t0-t1) + co2*(u0-u1) 1428 a(jp+j) = co3*(t2+u3) - si3*(u2-t3) 1429 b(jp+j) = si3*(t2+u3) + co3*(u2-t3) 1430 j = j + jump 1431 440 continue 1432*-----(end of loop across transforms) 1433 ja = ja + jstepx 1434 if (ja.lt.istart) ja = ja + ninc 1435 445 continue 1436 450 continue 1437 460 continue 1438*-----( end of double loop for this k ) 1439 kk = kk + 2*la 1440 470 continue 1441*-----( end of loop over values of k ) 1442 la = 4*la 1443 480 continue 1444*-----( end of loop on type II radix-4 passes ) 1445*-----( nvex transforms completed) 1446 490 continue 1447 istart = istart + nvex * jump 1448 500 continue 1449*-----( end of loop on blocks of transforms ) 1450* 1451 return 1452 end subroutine gpfa2f 1453 1454!****************************************************************** 1455 1456* fortran version of *gpfa3* - 1457* radix-3 section of self-sorting, in-place 1458* generalized PFA 1459* 1460*------------------------------------------------------------------- 1461* 1462 subroutine gpfa3f(a,b,trigs,inc,jump,n,mm,lot,isign) 1463 1464 real(dp) :: a(*), b(*) 1465 real(dp) :: trigs(*) 1466 integer inc, jump, n, mm, lot, isign 1467 1468 real(dp), parameter :: sin60 = 0.866025403784437_dp 1469 1470 integer :: je, jf, jg, jh, laincl, ll, ji 1471 integer :: n3, inq, jstepx, ninc, ink, mu, m, mh, 1472 $ nblox, left, istart, nb, nvex, la, 1473 $ ipass, jstep, jstepl, jjj, ja, nu, jb, 1474 $ jc, j, l, kk, k, jd 1475 1476 real(dp) :: s, ajb, ajc, t1, aja, t2, t3, bjb, bjc, 1477 $ u1, bja, u2, u3, co1, si1, co2, si2, ajd, 1478 $ bjd 1479 real(dp) :: c1, aje, ajg, ajf, ajh, bje, 1480 $ bjf, bjg, bjh, aji, bji 1481 1482 1483 integer, parameter :: lvr = 128 1484* 1485* *************************************************************** 1486* * * 1487* * N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. * 1488* * RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE * 1489* * (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER. * 1490* * * 1491* *************************************************************** 1492* 1493 n3 = 3**mm 1494 inq = n/n3 1495 jstepx = (n3-n) * inc 1496 ninc = n * inc 1497 ink = inc * inq 1498 mu = mod(inq,3) 1499 if (isign.eq.-1) mu = 3-mu 1500 m = mm 1501 mh = (m+1)/2 1502 s = real(isign,kind=dp) 1503 c1 = sin60 1504 if (mu.eq.2) c1 = -c1 1505* 1506 nblox = 1 + (lot-1)/lvr 1507 left = lot 1508 s = real(isign,kind=dp) 1509 istart = 1 1510* 1511* loop on blocks of lvr transforms 1512* -------------------------------- 1513 do 500 nb = 1 , nblox 1514* 1515 if (left.le.lvr) then 1516 nvex = left 1517 else if (left.lt.(2*lvr)) then 1518 nvex = left/2 1519 nvex = nvex + mod(nvex,2) 1520 else 1521 nvex = lvr 1522 endif 1523 left = left - nvex 1524* 1525 la = 1 1526* 1527* loop on type I radix-3 passes 1528* ----------------------------- 1529 do 160 ipass = 1 , mh 1530 jstep = (n*inc) / (3*la) 1531 jstepl = jstep - ninc 1532* 1533* k = 0 loop (no twiddle factors) 1534* ------------------------------- 1535 do 120 jjj = 0 , (n-1)*inc , 3*jstep 1536 ja = istart + jjj 1537* 1538* "transverse" loop 1539* ----------------- 1540 do 115 nu = 1 , inq 1541 jb = ja + jstepl 1542 if (jb.lt.istart) jb = jb + ninc 1543 jc = jb + jstepl 1544 if (jc.lt.istart) jc = jc + ninc 1545 j = 0 1546* 1547* loop across transforms 1548* ---------------------- 1549#ifdef OLD_CRAY 1550cdir$ ivdep, shortloop 1551#endif 1552 do 110 l = 1 , nvex 1553 ajb = a(jb+j) 1554 ajc = a(jc+j) 1555 t1 = ajb + ajc 1556 aja = a(ja+j) 1557 t2 = aja - 0.5_dp * t1 1558 t3 = c1 * ( ajb - ajc ) 1559 bjb = b(jb+j) 1560 bjc = b(jc+j) 1561 u1 = bjb + bjc 1562 bja = b(ja+j) 1563 u2 = bja - 0.5_dp * u1 1564 u3 = c1 * ( bjb - bjc ) 1565 a(ja+j) = aja + t1 1566 b(ja+j) = bja + u1 1567 a(jb+j) = t2 - u3 1568 b(jb+j) = u2 + t3 1569 a(jc+j) = t2 + u3 1570 b(jc+j) = u2 - t3 1571 j = j + jump 1572 110 continue 1573 ja = ja + jstepx 1574 if (ja.lt.istart) ja = ja + ninc 1575 115 continue 1576 120 continue 1577* 1578* finished if n3 = 3 1579* ------------------ 1580 if (n3.eq.3) go to 490 1581 kk = 2 * la 1582* 1583* loop on nonzero k 1584* ----------------- 1585 do 150 k = ink , jstep-ink , ink 1586 co1 = trigs(kk+1) 1587 si1 = s*trigs(kk+2) 1588 co2 = trigs(2*kk+1) 1589 si2 = s*trigs(2*kk+2) 1590* 1591* loop along transform 1592* -------------------- 1593 do 140 jjj = k , (n-1)*inc , 3*jstep 1594 ja = istart + jjj 1595* 1596* "transverse" loop 1597* ----------------- 1598 do 135 nu = 1 , inq 1599 jb = ja + jstepl 1600 if (jb.lt.istart) jb = jb + ninc 1601 jc = jb + jstepl 1602 if (jc.lt.istart) jc = jc + ninc 1603 j = 0 1604* 1605* loop across transforms 1606* ---------------------- 1607#ifdef OLD_CRAY 1608cdir$ ivdep, shortloop 1609#endif 1610 do 130 l = 1 , nvex 1611 ajb = a(jb+j) 1612 ajc = a(jc+j) 1613 t1 = ajb + ajc 1614 aja = a(ja+j) 1615 t2 = aja - 0.5_dp * t1 1616 t3 = c1 * ( ajb - ajc ) 1617 bjb = b(jb+j) 1618 bjc = b(jc+j) 1619 u1 = bjb + bjc 1620 bja = b(ja+j) 1621 u2 = bja - 0.5_dp * u1 1622 u3 = c1 * ( bjb - bjc ) 1623 a(ja+j) = aja + t1 1624 b(ja+j) = bja + u1 1625 a(jb+j) = co1*(t2-u3) - si1*(u2+t3) 1626 b(jb+j) = si1*(t2-u3) + co1*(u2+t3) 1627 a(jc+j) = co2*(t2+u3) - si2*(u2-t3) 1628 b(jc+j) = si2*(t2+u3) + co2*(u2-t3) 1629 j = j + jump 1630 130 continue 1631*-----( end of loop across transforms ) 1632 ja = ja + jstepx 1633 if (ja.lt.istart) ja = ja + ninc 1634 135 continue 1635 140 continue 1636*-----( end of loop along transforms ) 1637 kk = kk + 2*la 1638 150 continue 1639*-----( end of loop on nonzero k ) 1640 la = 3*la 1641 160 continue 1642*-----( end of loop on type I radix-3 passes) 1643* 1644* loop on type II radix-3 passes 1645* ------------------------------ 1646 400 continue 1647* 1648 do 480 ipass = mh+1 , m 1649 jstep = (n*inc) / (3*la) 1650 jstepl = jstep - ninc 1651 laincl = la*ink - ninc 1652* 1653* k=0 loop (no twiddle factors) 1654* ----------------------------- 1655 do 430 ll = 0 , (la-1)*ink , 3*jstep 1656* 1657 do 420 jjj = ll , (n-1)*inc , 3*la*ink 1658 ja = istart + jjj 1659* 1660* "transverse" loop 1661* ----------------- 1662 do 415 nu = 1 , inq 1663 jb = ja + jstepl 1664 if (jb.lt.istart) jb = jb + ninc 1665 jc = jb + jstepl 1666 if (jc.lt.istart) jc = jc + ninc 1667 jd = ja + laincl 1668 if (jd.lt.istart) jd = jd + ninc 1669 je = jd + jstepl 1670 if (je.lt.istart) je = je + ninc 1671 jf = je + jstepl 1672 if (jf.lt.istart) jf = jf + ninc 1673 jg = jd + laincl 1674 if (jg.lt.istart) jg = jg + ninc 1675 jh = jg + jstepl 1676 if (jh.lt.istart) jh = jh + ninc 1677 ji = jh + jstepl 1678 if (ji.lt.istart) ji = ji + ninc 1679 j = 0 1680* 1681* loop across transforms 1682* ---------------------- 1683#ifdef OLD_CRAY 1684cdir$ ivdep, shortloop 1685#endif 1686 do 410 l = 1 , nvex 1687 ajb = a(jb+j) 1688 ajc = a(jc+j) 1689 t1 = ajb + ajc 1690 aja = a(ja+j) 1691 t2 = aja - 0.5_dp * t1 1692 t3 = c1 * ( ajb - ajc ) 1693 ajd = a(jd+j) 1694 ajb = ajd 1695 bjb = b(jb+j) 1696 bjc = b(jc+j) 1697 u1 = bjb + bjc 1698 bja = b(ja+j) 1699 u2 = bja - 0.5_dp * u1 1700 u3 = c1 * ( bjb - bjc ) 1701 bjd = b(jd+j) 1702 bjb = bjd 1703 a(ja+j) = aja + t1 1704 b(ja+j) = bja + u1 1705 a(jd+j) = t2 - u3 1706 b(jd+j) = u2 + t3 1707 ajc = t2 + u3 1708 bjc = u2 - t3 1709*---------------------- 1710 aje = a(je+j) 1711 ajf = a(jf+j) 1712 t1 = aje + ajf 1713 t2 = ajb - 0.5_dp * t1 1714 t3 = c1 * ( aje - ajf ) 1715 ajh = a(jh+j) 1716 ajf = ajh 1717 bje = b(je+j) 1718 bjf = b(jf+j) 1719 u1 = bje + bjf 1720 u2 = bjb - 0.5_dp * u1 1721 u3 = c1 * ( bje - bjf ) 1722 bjh = b(jh+j) 1723 bjf = bjh 1724 a(jb+j) = ajb + t1 1725 b(jb+j) = bjb + u1 1726 a(je+j) = t2 - u3 1727 b(je+j) = u2 + t3 1728 a(jh+j) = t2 + u3 1729 b(jh+j) = u2 - t3 1730*---------------------- 1731 aji = a(ji+j) 1732 t1 = ajf + aji 1733 ajg = a(jg+j) 1734 t2 = ajg - 0.5_dp * t1 1735 t3 = c1 * ( ajf - aji ) 1736 t1 = ajg + t1 1737 a(jg+j) = ajc 1738 bji = b(ji+j) 1739 u1 = bjf + bji 1740 bjg = b(jg+j) 1741 u2 = bjg - 0.5_dp * u1 1742 u3 = c1 * ( bjf - bji ) 1743 u1 = bjg + u1 1744 b(jg+j) = bjc 1745 a(jc+j) = t1 1746 b(jc+j) = u1 1747 a(jf+j) = t2 - u3 1748 b(jf+j) = u2 + t3 1749 a(ji+j) = t2 + u3 1750 b(ji+j) = u2 - t3 1751 j = j + jump 1752 410 continue 1753*-----( end of loop across transforms ) 1754 ja = ja + jstepx 1755 if (ja.lt.istart) ja = ja + ninc 1756 415 continue 1757 420 continue 1758 430 continue 1759*-----( end of double loop for k=0 ) 1760* 1761* finished if last pass 1762* --------------------- 1763 if (ipass.eq.m) go to 490 1764* 1765 kk = 2*la 1766* 1767* loop on nonzero k 1768* ----------------- 1769 do 470 k = ink , jstep-ink , ink 1770 co1 = trigs(kk+1) 1771 si1 = s*trigs(kk+2) 1772 co2 = trigs(2*kk+1) 1773 si2 = s*trigs(2*kk+2) 1774* 1775* double loop along first transform in block 1776* ------------------------------------------ 1777 do 460 ll = k , (la-1)*ink , 3*jstep 1778* 1779 do 450 jjj = ll , (n-1)*inc , 3*la*ink 1780 ja = istart + jjj 1781* 1782* "transverse" loop 1783* ----------------- 1784 do 445 nu = 1 , inq 1785 jb = ja + jstepl 1786 if (jb.lt.istart) jb = jb + ninc 1787 jc = jb + jstepl 1788 if (jc.lt.istart) jc = jc + ninc 1789 jd = ja + laincl 1790 if (jd.lt.istart) jd = jd + ninc 1791 je = jd + jstepl 1792 if (je.lt.istart) je = je + ninc 1793 jf = je + jstepl 1794 if (jf.lt.istart) jf = jf + ninc 1795 jg = jd + laincl 1796 if (jg.lt.istart) jg = jg + ninc 1797 jh = jg + jstepl 1798 if (jh.lt.istart) jh = jh + ninc 1799 ji = jh + jstepl 1800 if (ji.lt.istart) ji = ji + ninc 1801 j = 0 1802* 1803* loop across transforms 1804* ---------------------- 1805#ifdef OLD_CRAY 1806cdir$ ivdep, shortloop 1807#endif 1808 do 440 l = 1 , nvex 1809 ajb = a(jb+j) 1810 ajc = a(jc+j) 1811 t1 = ajb + ajc 1812 aja = a(ja+j) 1813 t2 = aja - 0.5_dp * t1 1814 t3 = c1 * ( ajb - ajc ) 1815 ajd = a(jd+j) 1816 ajb = ajd 1817 bjb = b(jb+j) 1818 bjc = b(jc+j) 1819 u1 = bjb + bjc 1820 bja = b(ja+j) 1821 u2 = bja - 0.5_dp * u1 1822 u3 = c1 * ( bjb - bjc ) 1823 bjd = b(jd+j) 1824 bjb = bjd 1825 a(ja+j) = aja + t1 1826 b(ja+j) = bja + u1 1827 a(jd+j) = co1*(t2-u3) - si1*(u2+t3) 1828 b(jd+j) = si1*(t2-u3) + co1*(u2+t3) 1829 ajc = co2*(t2+u3) - si2*(u2-t3) 1830 bjc = si2*(t2+u3) + co2*(u2-t3) 1831*---------------------- 1832 aje = a(je+j) 1833 ajf = a(jf+j) 1834 t1 = aje + ajf 1835 t2 = ajb - 0.5_dp * t1 1836 t3 = c1 * ( aje - ajf ) 1837 ajh = a(jh+j) 1838 ajf = ajh 1839 bje = b(je+j) 1840 bjf = b(jf+j) 1841 u1 = bje + bjf 1842 u2 = bjb - 0.5_dp * u1 1843 u3 = c1 * ( bje - bjf ) 1844 bjh = b(jh+j) 1845 bjf = bjh 1846 a(jb+j) = ajb + t1 1847 b(jb+j) = bjb + u1 1848 a(je+j) = co1*(t2-u3) - si1*(u2+t3) 1849 b(je+j) = si1*(t2-u3) + co1*(u2+t3) 1850 a(jh+j) = co2*(t2+u3) - si2*(u2-t3) 1851 b(jh+j) = si2*(t2+u3) + co2*(u2-t3) 1852*---------------------- 1853 aji = a(ji+j) 1854 t1 = ajf + aji 1855 ajg = a(jg+j) 1856 t2 = ajg - 0.5_dp * t1 1857 t3 = c1 * ( ajf - aji ) 1858 t1 = ajg + t1 1859 a(jg+j) = ajc 1860 bji = b(ji+j) 1861 u1 = bjf + bji 1862 bjg = b(jg+j) 1863 u2 = bjg - 0.5_dp * u1 1864 u3 = c1 * ( bjf - bji ) 1865 u1 = bjg + u1 1866 b(jg+j) = bjc 1867 a(jc+j) = t1 1868 b(jc+j) = u1 1869 a(jf+j) = co1*(t2-u3) - si1*(u2+t3) 1870 b(jf+j) = si1*(t2-u3) + co1*(u2+t3) 1871 a(ji+j) = co2*(t2+u3) - si2*(u2-t3) 1872 b(ji+j) = si2*(t2+u3) + co2*(u2-t3) 1873 j = j + jump 1874 440 continue 1875*-----(end of loop across transforms) 1876 ja = ja + jstepx 1877 if (ja.lt.istart) ja = ja + ninc 1878 445 continue 1879 450 continue 1880 460 continue 1881*-----( end of double loop for this k ) 1882 kk = kk + 2*la 1883 470 continue 1884*-----( end of loop over values of k ) 1885 la = 3*la 1886 480 continue 1887*-----( end of loop on type II radix-3 passes ) 1888*-----( nvex transforms completed) 1889 490 continue 1890 istart = istart + nvex * jump 1891 500 continue 1892*-----( end of loop on blocks of transforms ) 1893* 1894 return 1895 end subroutine gpfa3f 1896 1897!******************************************************************* 1898 1899* fortran version of *gpfa5* - 1900* radix-5 section of self-sorting, in-place, 1901* generalized pfa 1902* 1903*------------------------------------------------------------------- 1904* 1905 subroutine gpfa5f(a,b,trigs,inc,jump,n,mm,lot,isign) 1906 1907 real(dp) :: a(*), b(*) 1908 real(dp) :: trigs(*) 1909 integer inc, jump, n, mm, lot, isign 1910 1911 real(dp), parameter :: sin36 = 0.587785252292473_dp, 1912 $ sin72 = 0.951056516295154_dp, 1913 $ qrt5 = 0.559016994374947_dp 1914 integer, parameter :: lvr = 128 1915 1916 integer :: inq, jstepx, ninc, ink, mu, m, mh, 1917 $ nblox, n5, left, istart, nb, nvex, la, 1918 $ ipass, jstep, jstepl, kk, k, jjj, ja, 1919 $ nu, jb, jc, jd, je, j, laincl, ll, l, 1920 $ jf, jg, jh, ji, jj, jk, jl, jm, jn, jo, 1921 $ jp, jq, jr, js, jt, ju, jv, jw, jx, jy 1922 1923 real(dp) :: s, c1, c2, c3, co1, si1, co2, si2, co3, 1924 $ si3, co4, si4, ajb, aje, t1, ajc, ajd, 1925 $ t2, t3, t4, t5, t6, aja, t7, t8, t9, t10, 1926 $ t11, bjb, bje, u1, bjc, bjd, u2, u3, u4, 1927 $ u5, u6, bja, u7, u8, u9, u10, u11, ajf, 1928 $ ajk, bjf, bjk, ajg, ajj, ajh, aji, ajl, 1929 $ ajq, bjg, bjj, bjh, bji, bjl, bjq, ajo, 1930 $ ajm, ajn, ajr, ajw, bjo, bjm, bjn, bjr, 1931 $ bjw, ajt, ajs, ajx, ajp, ax, bjt, bjs, 1932 $ bjx, bjp, bx, ajv, ajy, aju, bjv, bjy, 1933 $ bju 1934* 1935* *************************************************************** 1936* * * 1937* * N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. * 1938* * RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE * 1939* * (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER. * 1940* * * 1941* *************************************************************** 1942* 1943 n5 = 5 ** mm 1944 inq = n / n5 1945 jstepx = (n5-n) * inc 1946 ninc = n * inc 1947 ink = inc * inq 1948 mu = mod(inq,5) 1949 if (isign.eq.-1) mu = 5 - mu 1950* 1951 m = mm 1952 mh = (m+1)/2 1953 s = real(isign,kind=dp) 1954 c1 = qrt5 1955 c2 = sin72 1956 c3 = sin36 1957 if (mu.eq.2.or.mu.eq.3) then 1958 c1 = -c1 1959 c2 = sin36 1960 c3 = sin72 1961 endif 1962 if (mu.eq.3.or.mu.eq.4) c2 = -c2 1963 if (mu.eq.2.or.mu.eq.4) c3 = -c3 1964* 1965 nblox = 1 + (lot-1)/lvr 1966 left = lot 1967 s = real(isign,kind=dp) 1968 istart = 1 1969* 1970* loop on blocks of lvr transforms 1971* -------------------------------- 1972 do 500 nb = 1 , nblox 1973* 1974 if (left.le.lvr) then 1975 nvex = left 1976 else if (left.lt.(2*lvr)) then 1977 nvex = left/2 1978 nvex = nvex + mod(nvex,2) 1979 else 1980 nvex = lvr 1981 endif 1982 left = left - nvex 1983* 1984 la = 1 1985* 1986* loop on type I radix-5 passes 1987* ----------------------------- 1988 do 160 ipass = 1 , mh 1989 jstep = (n*inc) / (5*la) 1990 jstepl = jstep - ninc 1991 kk = 0 1992* 1993* loop on k 1994* --------- 1995 do 150 k = 0 , jstep-ink , ink 1996* 1997 if (k.gt.0) then 1998 co1 = trigs(kk+1) 1999 si1 = s*trigs(kk+2) 2000 co2 = trigs(2*kk+1) 2001 si2 = s*trigs(2*kk+2) 2002 co3 = trigs(3*kk+1) 2003 si3 = s*trigs(3*kk+2) 2004 co4 = trigs(4*kk+1) 2005 si4 = s*trigs(4*kk+2) 2006 endif 2007* 2008* loop along transform 2009* -------------------- 2010 do 140 jjj = k , (n-1)*inc , 5*jstep 2011 ja = istart + jjj 2012* 2013* "transverse" loop 2014* ----------------- 2015 do 135 nu = 1 , inq 2016 jb = ja + jstepl 2017 if (jb.lt.istart) jb = jb + ninc 2018 jc = jb + jstepl 2019 if (jc.lt.istart) jc = jc + ninc 2020 jd = jc + jstepl 2021 if (jd.lt.istart) jd = jd + ninc 2022 je = jd + jstepl 2023 if (je.lt.istart) je = je + ninc 2024 j = 0 2025* 2026* loop across transforms 2027* ---------------------- 2028 if (k.eq.0) then 2029* 2030#ifdef OLD_CRAY 2031cdir$ ivdep, shortloop 2032#endif 2033 do 110 l = 1 , nvex 2034 ajb = a(jb+j) 2035 aje = a(je+j) 2036 t1 = ajb + aje 2037 ajc = a(jc+j) 2038 ajd = a(jd+j) 2039 t2 = ajc + ajd 2040 t3 = ajb - aje 2041 t4 = ajc - ajd 2042 t5 = t1 + t2 2043 t6 = c1 * ( t1 - t2 ) 2044 aja = a(ja+j) 2045 t7 = aja - 0.25_dp * t5 2046 a(ja+j) = aja + t5 2047 t8 = t7 + t6 2048 t9 = t7 - t6 2049 t10 = c3 * t3 - c2 * t4 2050 t11 = c2 * t3 + c3 * t4 2051 bjb = b(jb+j) 2052 bje = b(je+j) 2053 u1 = bjb + bje 2054 bjc = b(jc+j) 2055 bjd = b(jd+j) 2056 u2 = bjc + bjd 2057 u3 = bjb - bje 2058 u4 = bjc - bjd 2059 u5 = u1 + u2 2060 u6 = c1 * ( u1 - u2 ) 2061 bja = b(ja+j) 2062 u7 = bja - 0.25_dp * u5 2063 b(ja+j) = bja + u5 2064 u8 = u7 + u6 2065 u9 = u7 - u6 2066 u10 = c3 * u3 - c2 * u4 2067 u11 = c2 * u3 + c3 * u4 2068 a(jb+j) = t8 - u11 2069 b(jb+j) = u8 + t11 2070 a(je+j) = t8 + u11 2071 b(je+j) = u8 - t11 2072 a(jc+j) = t9 - u10 2073 b(jc+j) = u9 + t10 2074 a(jd+j) = t9 + u10 2075 b(jd+j) = u9 - t10 2076 j = j + jump 2077 110 continue 2078* 2079 else 2080* 2081#ifdef OLD_CRAY 2082cdir$ ivdep, shortloop 2083#endif 2084 do 130 l = 1 , nvex 2085 ajb = a(jb+j) 2086 aje = a(je+j) 2087 t1 = ajb + aje 2088 ajc = a(jc+j) 2089 ajd = a(jd+j) 2090 t2 = ajc + ajd 2091 t3 = ajb - aje 2092 t4 = ajc - ajd 2093 t5 = t1 + t2 2094 t6 = c1 * ( t1 - t2 ) 2095 aja = a(ja+j) 2096 t7 = aja - 0.25_dp * t5 2097 a(ja+j) = aja + t5 2098 t8 = t7 + t6 2099 t9 = t7 - t6 2100 t10 = c3 * t3 - c2 * t4 2101 t11 = c2 * t3 + c3 * t4 2102 bjb = b(jb+j) 2103 bje = b(je+j) 2104 u1 = bjb + bje 2105 bjc = b(jc+j) 2106 bjd = b(jd+j) 2107 u2 = bjc + bjd 2108 u3 = bjb - bje 2109 u4 = bjc - bjd 2110 u5 = u1 + u2 2111 u6 = c1 * ( u1 - u2 ) 2112 bja = b(ja+j) 2113 u7 = bja - 0.25_dp * u5 2114 b(ja+j) = bja + u5 2115 u8 = u7 + u6 2116 u9 = u7 - u6 2117 u10 = c3 * u3 - c2 * u4 2118 u11 = c2 * u3 + c3 * u4 2119 a(jb+j) = co1*(t8-u11) - si1*(u8+t11) 2120 b(jb+j) = si1*(t8-u11) + co1*(u8+t11) 2121 a(je+j) = co4*(t8+u11) - si4*(u8-t11) 2122 b(je+j) = si4*(t8+u11) + co4*(u8-t11) 2123 a(jc+j) = co2*(t9-u10) - si2*(u9+t10) 2124 b(jc+j) = si2*(t9-u10) + co2*(u9+t10) 2125 a(jd+j) = co3*(t9+u10) - si3*(u9-t10) 2126 b(jd+j) = si3*(t9+u10) + co3*(u9-t10) 2127 j = j + jump 2128 130 continue 2129* 2130 endif 2131* 2132*-----( end of loop across transforms ) 2133* 2134 ja = ja + jstepx 2135 if (ja.lt.istart) ja = ja + ninc 2136 135 continue 2137 140 continue 2138*-----( end of loop along transforms ) 2139 kk = kk + 2*la 2140 150 continue 2141*-----( end of loop on nonzero k ) 2142 la = 5*la 2143 160 continue 2144*-----( end of loop on type I radix-5 passes) 2145* 2146 if (n.eq.5) go to 490 2147* 2148* loop on type II radix-5 passes 2149* ------------------------------ 2150 400 continue 2151* 2152 do 480 ipass = mh+1 , m 2153 jstep = (n*inc) / (5*la) 2154 jstepl = jstep - ninc 2155 laincl = la * ink - ninc 2156 kk = 0 2157* 2158* loop on k 2159* --------- 2160 do 470 k = 0 , jstep-ink , ink 2161* 2162 if (k.gt.0) then 2163 co1 = trigs(kk+1) 2164 si1 = s*trigs(kk+2) 2165 co2 = trigs(2*kk+1) 2166 si2 = s*trigs(2*kk+2) 2167 co3 = trigs(3*kk+1) 2168 si3 = s*trigs(3*kk+2) 2169 co4 = trigs(4*kk+1) 2170 si4 = s*trigs(4*kk+2) 2171 endif 2172* 2173* double loop along first transform in block 2174* ------------------------------------------ 2175 do 460 ll = k , (la-1)*ink , 5*jstep 2176* 2177 do 450 jjj = ll , (n-1)*inc , 5*la*ink 2178 ja = istart + jjj 2179* 2180* "transverse" loop 2181* ----------------- 2182 do 445 nu = 1 , inq 2183 jb = ja + jstepl 2184 if (jb.lt.istart) jb = jb + ninc 2185 jc = jb + jstepl 2186 if (jc.lt.istart) jc = jc + ninc 2187 jd = jc + jstepl 2188 if (jd.lt.istart) jd = jd + ninc 2189 je = jd + jstepl 2190 if (je.lt.istart) je = je + ninc 2191 jf = ja + laincl 2192 if (jf.lt.istart) jf = jf + ninc 2193 jg = jf + jstepl 2194 if (jg.lt.istart) jg = jg + ninc 2195 jh = jg + jstepl 2196 if (jh.lt.istart) jh = jh + ninc 2197 ji = jh + jstepl 2198 if (ji.lt.istart) ji = ji + ninc 2199 jj = ji + jstepl 2200 if (jj.lt.istart) jj = jj + ninc 2201 jk = jf + laincl 2202 if (jk.lt.istart) jk = jk + ninc 2203 jl = jk + jstepl 2204 if (jl.lt.istart) jl = jl + ninc 2205 jm = jl + jstepl 2206 if (jm.lt.istart) jm = jm + ninc 2207 jn = jm + jstepl 2208 if (jn.lt.istart) jn = jn + ninc 2209 jo = jn + jstepl 2210 if (jo.lt.istart) jo = jo + ninc 2211 jp = jk + laincl 2212 if (jp.lt.istart) jp = jp + ninc 2213 jq = jp + jstepl 2214 if (jq.lt.istart) jq = jq + ninc 2215 jr = jq + jstepl 2216 if (jr.lt.istart) jr = jr + ninc 2217 js = jr + jstepl 2218 if (js.lt.istart) js = js + ninc 2219 jt = js + jstepl 2220 if (jt.lt.istart) jt = jt + ninc 2221 ju = jp + laincl 2222 if (ju.lt.istart) ju = ju + ninc 2223 jv = ju + jstepl 2224 if (jv.lt.istart) jv = jv + ninc 2225 jw = jv + jstepl 2226 if (jw.lt.istart) jw = jw + ninc 2227 jx = jw + jstepl 2228 if (jx.lt.istart) jx = jx + ninc 2229 jy = jx + jstepl 2230 if (jy.lt.istart) jy = jy + ninc 2231 j = 0 2232* 2233* loop across transforms 2234* ---------------------- 2235 if (k.eq.0) then 2236* 2237#ifdef OLD_CRAY 2238cdir$ ivdep, shortloop 2239#endif 2240 do 410 l = 1 , nvex 2241 ajb = a(jb+j) 2242 aje = a(je+j) 2243 t1 = ajb + aje 2244 ajc = a(jc+j) 2245 ajd = a(jd+j) 2246 t2 = ajc + ajd 2247 t3 = ajb - aje 2248 t4 = ajc - ajd 2249 ajf = a(jf+j) 2250 ajb = ajf 2251 t5 = t1 + t2 2252 t6 = c1 * ( t1 - t2 ) 2253 aja = a(ja+j) 2254 t7 = aja - 0.25_dp * t5 2255 a(ja+j) = aja + t5 2256 t8 = t7 + t6 2257 t9 = t7 - t6 2258 ajk = a(jk+j) 2259 ajc = ajk 2260 t10 = c3 * t3 - c2 * t4 2261 t11 = c2 * t3 + c3 * t4 2262 bjb = b(jb+j) 2263 bje = b(je+j) 2264 u1 = bjb + bje 2265 bjc = b(jc+j) 2266 bjd = b(jd+j) 2267 u2 = bjc + bjd 2268 u3 = bjb - bje 2269 u4 = bjc - bjd 2270 bjf = b(jf+j) 2271 bjb = bjf 2272 u5 = u1 + u2 2273 u6 = c1 * ( u1 - u2 ) 2274 bja = b(ja+j) 2275 u7 = bja - 0.25_dp * u5 2276 b(ja+j) = bja + u5 2277 u8 = u7 + u6 2278 u9 = u7 - u6 2279 bjk = b(jk+j) 2280 bjc = bjk 2281 u10 = c3 * u3 - c2 * u4 2282 u11 = c2 * u3 + c3 * u4 2283 a(jf+j) = t8 - u11 2284 b(jf+j) = u8 + t11 2285 aje = t8 + u11 2286 bje = u8 - t11 2287 a(jk+j) = t9 - u10 2288 b(jk+j) = u9 + t10 2289 ajd = t9 + u10 2290 bjd = u9 - t10 2291*---------------------- 2292 ajg = a(jg+j) 2293 ajj = a(jj+j) 2294 t1 = ajg + ajj 2295 ajh = a(jh+j) 2296 aji = a(ji+j) 2297 t2 = ajh + aji 2298 t3 = ajg - ajj 2299 t4 = ajh - aji 2300 ajl = a(jl+j) 2301 ajh = ajl 2302 t5 = t1 + t2 2303 t6 = c1 * ( t1 - t2 ) 2304 t7 = ajb - 0.25_dp * t5 2305 a(jb+j) = ajb + t5 2306 t8 = t7 + t6 2307 t9 = t7 - t6 2308 ajq = a(jq+j) 2309 aji = ajq 2310 t10 = c3 * t3 - c2 * t4 2311 t11 = c2 * t3 + c3 * t4 2312 bjg = b(jg+j) 2313 bjj = b(jj+j) 2314 u1 = bjg + bjj 2315 bjh = b(jh+j) 2316 bji = b(ji+j) 2317 u2 = bjh + bji 2318 u3 = bjg - bjj 2319 u4 = bjh - bji 2320 bjl = b(jl+j) 2321 bjh = bjl 2322 u5 = u1 + u2 2323 u6 = c1 * ( u1 - u2 ) 2324 u7 = bjb - 0.25_dp * u5 2325 b(jb+j) = bjb + u5 2326 u8 = u7 + u6 2327 u9 = u7 - u6 2328 bjq = b(jq+j) 2329 bji = bjq 2330 u10 = c3 * u3 - c2 * u4 2331 u11 = c2 * u3 + c3 * u4 2332 a(jg+j) = t8 - u11 2333 b(jg+j) = u8 + t11 2334 ajj = t8 + u11 2335 bjj = u8 - t11 2336 a(jl+j) = t9 - u10 2337 b(jl+j) = u9 + t10 2338 a(jq+j) = t9 + u10 2339 b(jq+j) = u9 - t10 2340*---------------------- 2341 ajo = a(jo+j) 2342 t1 = ajh + ajo 2343 ajm = a(jm+j) 2344 ajn = a(jn+j) 2345 t2 = ajm + ajn 2346 t3 = ajh - ajo 2347 t4 = ajm - ajn 2348 ajr = a(jr+j) 2349 ajn = ajr 2350 t5 = t1 + t2 2351 t6 = c1 * ( t1 - t2 ) 2352 t7 = ajc - 0.25_dp * t5 2353 a(jc+j) = ajc + t5 2354 t8 = t7 + t6 2355 t9 = t7 - t6 2356 ajw = a(jw+j) 2357 ajo = ajw 2358 t10 = c3 * t3 - c2 * t4 2359 t11 = c2 * t3 + c3 * t4 2360 bjo = b(jo+j) 2361 u1 = bjh + bjo 2362 bjm = b(jm+j) 2363 bjn = b(jn+j) 2364 u2 = bjm + bjn 2365 u3 = bjh - bjo 2366 u4 = bjm - bjn 2367 bjr = b(jr+j) 2368 bjn = bjr 2369 u5 = u1 + u2 2370 u6 = c1 * ( u1 - u2 ) 2371 u7 = bjc - 0.25_dp * u5 2372 b(jc+j) = bjc + u5 2373 u8 = u7 + u6 2374 u9 = u7 - u6 2375 bjw = b(jw+j) 2376 bjo = bjw 2377 u10 = c3 * u3 - c2 * u4 2378 u11 = c2 * u3 + c3 * u4 2379 a(jh+j) = t8 - u11 2380 b(jh+j) = u8 + t11 2381 a(jw+j) = t8 + u11 2382 b(jw+j) = u8 - t11 2383 a(jm+j) = t9 - u10 2384 b(jm+j) = u9 + t10 2385 a(jr+j) = t9 + u10 2386 b(jr+j) = u9 - t10 2387*---------------------- 2388 ajt = a(jt+j) 2389 t1 = aji + ajt 2390 ajs = a(js+j) 2391 t2 = ajn + ajs 2392 t3 = aji - ajt 2393 t4 = ajn - ajs 2394 ajx = a(jx+j) 2395 ajt = ajx 2396 t5 = t1 + t2 2397 t6 = c1 * ( t1 - t2 ) 2398 ajp = a(jp+j) 2399 t7 = ajp - 0.25_dp * t5 2400 ax = ajp + t5 2401 t8 = t7 + t6 2402 t9 = t7 - t6 2403 a(jp+j) = ajd 2404 t10 = c3 * t3 - c2 * t4 2405 t11 = c2 * t3 + c3 * t4 2406 a(jd+j) = ax 2407 bjt = b(jt+j) 2408 u1 = bji + bjt 2409 bjs = b(js+j) 2410 u2 = bjn + bjs 2411 u3 = bji - bjt 2412 u4 = bjn - bjs 2413 bjx = b(jx+j) 2414 bjt = bjx 2415 u5 = u1 + u2 2416 u6 = c1 * ( u1 - u2 ) 2417 bjp = b(jp+j) 2418 u7 = bjp - 0.25_dp * u5 2419 bx = bjp + u5 2420 u8 = u7 + u6 2421 u9 = u7 - u6 2422 b(jp+j) = bjd 2423 u10 = c3 * u3 - c2 * u4 2424 u11 = c2 * u3 + c3 * u4 2425 b(jd+j) = bx 2426 a(ji+j) = t8 - u11 2427 b(ji+j) = u8 + t11 2428 a(jx+j) = t8 + u11 2429 b(jx+j) = u8 - t11 2430 a(jn+j) = t9 - u10 2431 b(jn+j) = u9 + t10 2432 a(js+j) = t9 + u10 2433 b(js+j) = u9 - t10 2434*---------------------- 2435 ajv = a(jv+j) 2436 ajy = a(jy+j) 2437 t1 = ajv + ajy 2438 t2 = ajo + ajt 2439 t3 = ajv - ajy 2440 t4 = ajo - ajt 2441 a(jv+j) = ajj 2442 t5 = t1 + t2 2443 t6 = c1 * ( t1 - t2 ) 2444 aju = a(ju+j) 2445 t7 = aju - 0.25_dp * t5 2446 ax = aju + t5 2447 t8 = t7 + t6 2448 t9 = t7 - t6 2449 a(ju+j) = aje 2450 t10 = c3 * t3 - c2 * t4 2451 t11 = c2 * t3 + c3 * t4 2452 a(je+j) = ax 2453 bjv = b(jv+j) 2454 bjy = b(jy+j) 2455 u1 = bjv + bjy 2456 u2 = bjo + bjt 2457 u3 = bjv - bjy 2458 u4 = bjo - bjt 2459 b(jv+j) = bjj 2460 u5 = u1 + u2 2461 u6 = c1 * ( u1 - u2 ) 2462 bju = b(ju+j) 2463 u7 = bju - 0.25_dp * u5 2464 bx = bju + u5 2465 u8 = u7 + u6 2466 u9 = u7 - u6 2467 b(ju+j) = bje 2468 u10 = c3 * u3 - c2 * u4 2469 u11 = c2 * u3 + c3 * u4 2470 b(je+j) = bx 2471 a(jj+j) = t8 - u11 2472 b(jj+j) = u8 + t11 2473 a(jy+j) = t8 + u11 2474 b(jy+j) = u8 - t11 2475 a(jo+j) = t9 - u10 2476 b(jo+j) = u9 + t10 2477 a(jt+j) = t9 + u10 2478 b(jt+j) = u9 - t10 2479 j = j + jump 2480 410 continue 2481* 2482 else 2483* 2484#ifdef OLD_CRAY 2485cdir$ ivdep, shortloop 2486#endif 2487 do 440 l = 1 , nvex 2488 ajb = a(jb+j) 2489 aje = a(je+j) 2490 t1 = ajb + aje 2491 ajc = a(jc+j) 2492 ajd = a(jd+j) 2493 t2 = ajc + ajd 2494 t3 = ajb - aje 2495 t4 = ajc - ajd 2496 ajf = a(jf+j) 2497 ajb = ajf 2498 t5 = t1 + t2 2499 t6 = c1 * ( t1 - t2 ) 2500 aja = a(ja+j) 2501 t7 = aja - 0.25_dp * t5 2502 a(ja+j) = aja + t5 2503 t8 = t7 + t6 2504 t9 = t7 - t6 2505 ajk = a(jk+j) 2506 ajc = ajk 2507 t10 = c3 * t3 - c2 * t4 2508 t11 = c2 * t3 + c3 * t4 2509 bjb = b(jb+j) 2510 bje = b(je+j) 2511 u1 = bjb + bje 2512 bjc = b(jc+j) 2513 bjd = b(jd+j) 2514 u2 = bjc + bjd 2515 u3 = bjb - bje 2516 u4 = bjc - bjd 2517 bjf = b(jf+j) 2518 bjb = bjf 2519 u5 = u1 + u2 2520 u6 = c1 * ( u1 - u2 ) 2521 bja = b(ja+j) 2522 u7 = bja - 0.25_dp * u5 2523 b(ja+j) = bja + u5 2524 u8 = u7 + u6 2525 u9 = u7 - u6 2526 bjk = b(jk+j) 2527 bjc = bjk 2528 u10 = c3 * u3 - c2 * u4 2529 u11 = c2 * u3 + c3 * u4 2530 a(jf+j) = co1*(t8-u11) - si1*(u8+t11) 2531 b(jf+j) = si1*(t8-u11) + co1*(u8+t11) 2532 aje = co4*(t8+u11) - si4*(u8-t11) 2533 bje = si4*(t8+u11) + co4*(u8-t11) 2534 a(jk+j) = co2*(t9-u10) - si2*(u9+t10) 2535 b(jk+j) = si2*(t9-u10) + co2*(u9+t10) 2536 ajd = co3*(t9+u10) - si3*(u9-t10) 2537 bjd = si3*(t9+u10) + co3*(u9-t10) 2538*---------------------- 2539 ajg = a(jg+j) 2540 ajj = a(jj+j) 2541 t1 = ajg + ajj 2542 ajh = a(jh+j) 2543 aji = a(ji+j) 2544 t2 = ajh + aji 2545 t3 = ajg - ajj 2546 t4 = ajh - aji 2547 ajl = a(jl+j) 2548 ajh = ajl 2549 t5 = t1 + t2 2550 t6 = c1 * ( t1 - t2 ) 2551 t7 = ajb - 0.25_dp * t5 2552 a(jb+j) = ajb + t5 2553 t8 = t7 + t6 2554 t9 = t7 - t6 2555 ajq = a(jq+j) 2556 aji = ajq 2557 t10 = c3 * t3 - c2 * t4 2558 t11 = c2 * t3 + c3 * t4 2559 bjg = b(jg+j) 2560 bjj = b(jj+j) 2561 u1 = bjg + bjj 2562 bjh = b(jh+j) 2563 bji = b(ji+j) 2564 u2 = bjh + bji 2565 u3 = bjg - bjj 2566 u4 = bjh - bji 2567 bjl = b(jl+j) 2568 bjh = bjl 2569 u5 = u1 + u2 2570 u6 = c1 * ( u1 - u2 ) 2571 u7 = bjb - 0.25_dp * u5 2572 b(jb+j) = bjb + u5 2573 u8 = u7 + u6 2574 u9 = u7 - u6 2575 bjq = b(jq+j) 2576 bji = bjq 2577 u10 = c3 * u3 - c2 * u4 2578 u11 = c2 * u3 + c3 * u4 2579 a(jg+j) = co1*(t8-u11) - si1*(u8+t11) 2580 b(jg+j) = si1*(t8-u11) + co1*(u8+t11) 2581 ajj = co4*(t8+u11) - si4*(u8-t11) 2582 bjj = si4*(t8+u11) + co4*(u8-t11) 2583 a(jl+j) = co2*(t9-u10) - si2*(u9+t10) 2584 b(jl+j) = si2*(t9-u10) + co2*(u9+t10) 2585 a(jq+j) = co3*(t9+u10) - si3*(u9-t10) 2586 b(jq+j) = si3*(t9+u10) + co3*(u9-t10) 2587*---------------------- 2588 ajo = a(jo+j) 2589 t1 = ajh + ajo 2590 ajm = a(jm+j) 2591 ajn = a(jn+j) 2592 t2 = ajm + ajn 2593 t3 = ajh - ajo 2594 t4 = ajm - ajn 2595 ajr = a(jr+j) 2596 ajn = ajr 2597 t5 = t1 + t2 2598 t6 = c1 * ( t1 - t2 ) 2599 t7 = ajc - 0.25_dp * t5 2600 a(jc+j) = ajc + t5 2601 t8 = t7 + t6 2602 t9 = t7 - t6 2603 ajw = a(jw+j) 2604 ajo = ajw 2605 t10 = c3 * t3 - c2 * t4 2606 t11 = c2 * t3 + c3 * t4 2607 bjo = b(jo+j) 2608 u1 = bjh + bjo 2609 bjm = b(jm+j) 2610 bjn = b(jn+j) 2611 u2 = bjm + bjn 2612 u3 = bjh - bjo 2613 u4 = bjm - bjn 2614 bjr = b(jr+j) 2615 bjn = bjr 2616 u5 = u1 + u2 2617 u6 = c1 * ( u1 - u2 ) 2618 u7 = bjc - 0.25_dp * u5 2619 b(jc+j) = bjc + u5 2620 u8 = u7 + u6 2621 u9 = u7 - u6 2622 bjw = b(jw+j) 2623 bjo = bjw 2624 u10 = c3 * u3 - c2 * u4 2625 u11 = c2 * u3 + c3 * u4 2626 a(jh+j) = co1*(t8-u11) - si1*(u8+t11) 2627 b(jh+j) = si1*(t8-u11) + co1*(u8+t11) 2628 a(jw+j) = co4*(t8+u11) - si4*(u8-t11) 2629 b(jw+j) = si4*(t8+u11) + co4*(u8-t11) 2630 a(jm+j) = co2*(t9-u10) - si2*(u9+t10) 2631 b(jm+j) = si2*(t9-u10) + co2*(u9+t10) 2632 a(jr+j) = co3*(t9+u10) - si3*(u9-t10) 2633 b(jr+j) = si3*(t9+u10) + co3*(u9-t10) 2634*---------------------- 2635 ajt = a(jt+j) 2636 t1 = aji + ajt 2637 ajs = a(js+j) 2638 t2 = ajn + ajs 2639 t3 = aji - ajt 2640 t4 = ajn - ajs 2641 ajx = a(jx+j) 2642 ajt = ajx 2643 t5 = t1 + t2 2644 t6 = c1 * ( t1 - t2 ) 2645 ajp = a(jp+j) 2646 t7 = ajp - 0.25_dp * t5 2647 ax = ajp + t5 2648 t8 = t7 + t6 2649 t9 = t7 - t6 2650 a(jp+j) = ajd 2651 t10 = c3 * t3 - c2 * t4 2652 t11 = c2 * t3 + c3 * t4 2653 a(jd+j) = ax 2654 bjt = b(jt+j) 2655 u1 = bji + bjt 2656 bjs = b(js+j) 2657 u2 = bjn + bjs 2658 u3 = bji - bjt 2659 u4 = bjn - bjs 2660 bjx = b(jx+j) 2661 bjt = bjx 2662 u5 = u1 + u2 2663 u6 = c1 * ( u1 - u2 ) 2664 bjp = b(jp+j) 2665 u7 = bjp - 0.25_dp * u5 2666 bx = bjp + u5 2667 u8 = u7 + u6 2668 u9 = u7 - u6 2669 b(jp+j) = bjd 2670 u10 = c3 * u3 - c2 * u4 2671 u11 = c2 * u3 + c3 * u4 2672 b(jd+j) = bx 2673 a(ji+j) = co1*(t8-u11) - si1*(u8+t11) 2674 b(ji+j) = si1*(t8-u11) + co1*(u8+t11) 2675 a(jx+j) = co4*(t8+u11) - si4*(u8-t11) 2676 b(jx+j) = si4*(t8+u11) + co4*(u8-t11) 2677 a(jn+j) = co2*(t9-u10) - si2*(u9+t10) 2678 b(jn+j) = si2*(t9-u10) + co2*(u9+t10) 2679 a(js+j) = co3*(t9+u10) - si3*(u9-t10) 2680 b(js+j) = si3*(t9+u10) + co3*(u9-t10) 2681*---------------------- 2682 ajv = a(jv+j) 2683 ajy = a(jy+j) 2684 t1 = ajv + ajy 2685 t2 = ajo + ajt 2686 t3 = ajv - ajy 2687 t4 = ajo - ajt 2688 a(jv+j) = ajj 2689 t5 = t1 + t2 2690 t6 = c1 * ( t1 - t2 ) 2691 aju = a(ju+j) 2692 t7 = aju - 0.25_dp * t5 2693 ax = aju + t5 2694 t8 = t7 + t6 2695 t9 = t7 - t6 2696 a(ju+j) = aje 2697 t10 = c3 * t3 - c2 * t4 2698 t11 = c2 * t3 + c3 * t4 2699 a(je+j) = ax 2700 bjv = b(jv+j) 2701 bjy = b(jy+j) 2702 u1 = bjv + bjy 2703 u2 = bjo + bjt 2704 u3 = bjv - bjy 2705 u4 = bjo - bjt 2706 b(jv+j) = bjj 2707 u5 = u1 + u2 2708 u6 = c1 * ( u1 - u2 ) 2709 bju = b(ju+j) 2710 u7 = bju - 0.25_dp * u5 2711 bx = bju + u5 2712 u8 = u7 + u6 2713 u9 = u7 - u6 2714 b(ju+j) = bje 2715 u10 = c3 * u3 - c2 * u4 2716 u11 = c2 * u3 + c3 * u4 2717 b(je+j) = bx 2718 a(jj+j) = co1*(t8-u11) - si1*(u8+t11) 2719 b(jj+j) = si1*(t8-u11) + co1*(u8+t11) 2720 a(jy+j) = co4*(t8+u11) - si4*(u8-t11) 2721 b(jy+j) = si4*(t8+u11) + co4*(u8-t11) 2722 a(jo+j) = co2*(t9-u10) - si2*(u9+t10) 2723 b(jo+j) = si2*(t9-u10) + co2*(u9+t10) 2724 a(jt+j) = co3*(t9+u10) - si3*(u9-t10) 2725 b(jt+j) = si3*(t9+u10) + co3*(u9-t10) 2726 j = j + jump 2727 440 continue 2728* 2729 endif 2730* 2731*-----(end of loop across transforms) 2732* 2733 ja = ja + jstepx 2734 if (ja.lt.istart) ja = ja + ninc 2735 445 continue 2736 450 continue 2737 460 continue 2738*-----( end of double loop for this k ) 2739 kk = kk + 2*la 2740 470 continue 2741*-----( end of loop over values of k ) 2742 la = 5*la 2743 480 continue 2744*-----( end of loop on type II radix-5 passes ) 2745*-----( nvex transforms completed) 2746 490 continue 2747 istart = istart + nvex * jump 2748 500 continue 2749*-----( end of loop on blocks of transforms ) 2750* 2751 return 2752 end subroutine gpfa5f 2753 2754 end module m_fft_gpfa 2755