1! ---
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt .
6! See Docs/Contributors.txt for a list of contributors.
7! ---
8      module m_fft_gpfa
9!
10!     1d fft routine based on Clive Temperton's GPFA package
11!
12      implicit none
13
14      ! Only double precision support in this module
15      integer, parameter :: dp = selected_real_kind(10,100)
16
17
18  ! This module is not thread safe due to the use of
19  ! these global variables.
20  ! To make it thread-safe, and to have more control over
21  ! the re-use of the auxiliary twiddle factors, you might
22  ! define a derived type to hold them and call the wrapper routines
23  ! with it as one of the arguments.
24
25      real(dp), allocatable, save :: trigs(:)
26      integer, save :: nold = -1
27  !----------------------------------------------------------
28      ! auxiliary variable
29      character(len=20) :: msg
30
31      public :: nfft            ! Generates N compatible with the fft
32
33      public :: fft_gpfa_ez     ! Simple version with complex array
34      public :: fft_gpfa        ! General version with strides,
35                                ! multiple vectors, etc
36
37      ! Wrapper for setgpfa which returns proper length of work array
38      public :: setgpfa_check
39
40      ! Access to the standard legacy routines, if needed
41      public :: gpfa, setgpfa
42
43      private
44
45      CONTAINS
46
47      SUBROUTINE nfft( N )
48
49      integer, intent(inout) :: N
50
51C CHANGES N INTO THE NEXT INTEGER ALLOWED BY THE FFT ROUTINE
52C WRITTEN BY J.M.SOLER. MAY/95.
53! Modified by A. Garcia, May 2015
54
55      integer :: np, nmax, nmin, nrem, ip
56      parameter (NP = 3, NMAX = huge(N))  ! no limits with huge
57      integer :: IPRIME(NP)
58      data IPRIME / 2, 3, 5 /
59
60      NMIN = N
61      DO N = NMIN, NMAX
62        NREM = N
63        DO IP = 1,NP
64   10     CONTINUE
65          IF ( MODULO( NREM, IPRIME(IP) ) .EQ. 0 ) THEN
66            NREM = NREM / IPRIME(IP)
67            GOTO 10
68          ENDIF
69        ENDDO
70        IF (NREM .EQ. 1) RETURN
71      ENDDO
72      write(msg,"(i0)") NMIN
73      call die('NFFT: NO SUITABLE INTEGER FOUND FOR N =' // trim(msg))
74      END SUBROUTINE nfft
75
76      subroutine fft_gpfa_ez(data,n,isign)
77
78      ! packed complex array in a real array twice as long
79
80      integer, intent(in)     :: n
81      real(dp), intent(inout) :: data(2*n)
82      integer, intent(in)     :: isign
83
84!        ISIGN = +1 FOR FORWARD TRANSFORM
85!              = -1 FOR INVERSE TRANSFORM
86
87  ! If n has changed since the last use,
88  ! set trigs array, used by 1D-FFT routine gpfa
89
90  ! The routine setgpfa_alloc is a special version
91  ! of setgpfa which (re)allocates the trigs argument
92  ! if needed, and re-fills it with new twiddle factors
93
94      if (n /= nold) then
95         call setgpfa_alloc( trigs, n )
96      end if
97
98  ! FFT. Arguments of gpfa are:
99  ! gpfa( realData(*), imagData(*), trigs(*),
100  !       dataSpan, vectorSpan, vectorSize, numVectors, fftDir )
101  !
102  ! Note dataSpan=2 since we are packing the real and imaginary
103  ! parts in the same vector.
104
105      call gpfa( data(1), data(2), trigs,
106     $           2, 1, n, 1, isign )
107
108      nold = n
109
110      end subroutine fft_gpfa_ez
111
112      subroutine fft_gpfa(realData, imagData,
113     $                    dataSpan, vectorSpan, N, numVectors,
114     $                    isign )
115
116      ! A wrapper for the GPFA routine which handles the
117      ! work array automatically
118
119      real(dp), intent(inout) :: realData(*), imagData(*)
120      integer, intent(in)     :: dataSpan, vectorSpan, N, numVectors
121      integer, intent(in)     :: isign
122
123!        ISIGN = +1 FOR FORWARD TRANSFORM
124!              = -1 FOR INVERSE TRANSFORM
125
126  ! Set trigs array, used by 1D-FFT routine gpfa
127  ! The routine setgpfa_alloc is a special version
128  ! of setgpfa which (re)allocates the trigs argument
129  ! if needed, and re-fills it with new twiddle factors
130  ! if n has changed since the last use.
131
132      if (n /= nold) then
133         call setgpfa_alloc( trigs, n )
134      end if
135
136      call gpfa(realData, imagData, trigs,
137     $          dataSpan, vectorSpan, N, numVectors, isign)
138
139      nold = n
140
141      end subroutine fft_gpfa
142!------------------------------------------------------------------
143      subroutine  setgpfa_alloc( trigs, n )
144      real(dp), allocatable, intent(inout) :: trigs(:)
145      integer, intent(in)                  :: n
146
147      integer :: ntrigs
148
149      if (.not. allocated(trigs)) then
150         allocate(trigs(100))
151      endif
152      call setgpfa_check(trigs,size(trigs),ntrigs,n)
153
154      if (size(trigs) < ntrigs) then
155         deallocate(trigs)
156         allocate(trigs(ntrigs))
157         call setgpfa_check(trigs,size(trigs),ntrigs,n)
158         if (size(trigs) < ntrigs) STOP "ntrigs error"
159      endif
160
161      end subroutine setgpfa_alloc
162!
163      SUBROUTINE SETGPFA_check(TRIGS,maxtrigs,ntrigs,N)
164*
165      integer, intent(in)  :: maxtrigs  ! The current size of trigs
166      REAL(dp)  TRIGS(maxtrigs)
167      integer, intent(out) :: ntrigs    ! The needed size for trigs
168      integer, intent(in)  :: N
169
170      INTEGER NJ(3)
171      integer       :: nn, ifac, ll, kk, ip, iq, ir,
172     $                 i, ni, irot, kink, k
173      real(dp)      :: angle, twopi, del
174*
175*     DECOMPOSE N INTO FACTORS 2,3,5
176*     ------------------------------
177      NN = N
178      IFAC = 2
179*
180      DO 30 LL = 1 , 3
181      KK = 0
182   10 CONTINUE
183      IF (MOD(NN,IFAC).NE.0) GO TO 20
184      KK = KK + 1
185      NN = NN / IFAC
186      GO TO 10
187   20 CONTINUE
188      NJ(LL) = KK
189      IFAC = IFAC + LL
190   30 CONTINUE
191*
192      IF (NN.NE.1) THEN
193         write(msg,"(i0)") N
194         call die('GPFA: '//trim(msg)//' IS NOT A LEGAL VALUE OF N')
195      ENDIF
196*
197      IP = NJ(1)
198      IQ = NJ(2)
199      IR = NJ(3)
200*
201*     COMPUTE LIST OF ROTATED TWIDDLE FACTORS
202*     ---------------------------------------
203      NJ(1) = 2**IP
204      NJ(2) = 3**IQ
205      NJ(3) = 5**IR
206*
207      TWOPI = 4.0_dp * ASIN(1.0_dp)
208      I = 1
209*
210      DO 60 LL = 1 , 3
211      NI = NJ(LL)
212      IF (NI.EQ.1) GO TO 60
213*
214      DEL = TWOPI / real(NI,kind=dp)
215      IROT = N / NI
216      KINK = MOD(IROT,NI)
217      KK = 0
218*
219      DO 50 K = 1 , NI
220      ANGLE = real(KK,kind=dp) * DEL
221      if (i.lt.maxtrigs) then
222        TRIGS(I) = COS(ANGLE)
223        TRIGS(I+1) = SIN(ANGLE)
224      endif
225      I = I + 2
226      KK = KK + KINK
227      IF (KK.GT.NI) KK = KK - NI
228   50 CONTINUE
229   60 CONTINUE
230      ntrigs = i - 1  ! note boundary. i is even
231*
232      RETURN
233      END SUBROUTINE SETGPFA_check
234
235!--------------------------------------------------------------
236!
237!  What follows is the original code by Temperton
238!  with explicit declarations of variables and converted to 'dp'.
239!
240!  For arbitrary-precision support the parameter constants might
241!  be computed instead of inserted explicitly.
242!
243!  For example: sin60 = sin(2.0_wp*pi/3.0_wp), where wp is the working
244!                       precision, and
245!               pi = 4.0_wp * atan(1.0_wp), or
246!               twopi = 4.0_wp * asin(1.0_wp)
247!
248!*********************************************************************
249!
250!        SUBROUTINE 'SETGPFA'
251!        SETUP ROUTINE FOR SELF-SORTING IN-PLACE
252!            GENERALIZED PRIME FACTOR (COMPLEX) FFT [GPFA]
253!
254!        CALL SETGPFA(TRIGS,N)
255!
256!        INPUT :
257!        -----
258!        N IS THE LENGTH OF THE TRANSFORMS. N MUST BE OF THE FORM:
259!          -----------------------------------
260!            N = (2**IP) * (3**IQ) * (5**IR)
261!          -----------------------------------
262!
263!        OUTPUT:
264!        ------
265!        TRIGS IS A TABLE OF TWIDDLE FACTORS,
266!          OF LENGTH 2*IPQR (REAL) WORDS, WHERE:
267!          --------------------------------------
268!            IPQR = (2**IP) + (3**IQ) + (5**IR)
269!          --------------------------------------
270!
271!        WRITTEN BY CLIVE TEMPERTON 1990
272!
273!*********************************************************************
274!
275      SUBROUTINE SETGPFA(TRIGS,N)
276*
277
278      INTEGER NJ(3)
279      REAL(dp) TRIGS(*)
280
281      integer       :: nn, n, ifac, ll, kk, ip, iq, ir,
282     $                 i, ni, irot, kink, k
283      real(dp)      :: angle, twopi, del
284*
285*     DECOMPOSE N INTO FACTORS 2,3,5
286*     ------------------------------
287      NN = N
288      IFAC = 2
289*
290      DO 30 LL = 1 , 3
291      KK = 0
292   10 CONTINUE
293      IF (MOD(NN,IFAC).NE.0) GO TO 20
294      KK = KK + 1
295      NN = NN / IFAC
296      GO TO 10
297   20 CONTINUE
298      NJ(LL) = KK
299      IFAC = IFAC + LL
300   30 CONTINUE
301*
302      IF (NN.NE.1) THEN
303         write(msg,"(i0)") N
304         call die('GPFA: '//trim(msg)//' IS NOT A LEGAL VALUE OF N')
305      ENDIF
306*
307      IP = NJ(1)
308      IQ = NJ(2)
309      IR = NJ(3)
310*
311*     COMPUTE LIST OF ROTATED TWIDDLE FACTORS
312*     ---------------------------------------
313      NJ(1) = 2**IP
314      NJ(2) = 3**IQ
315      NJ(3) = 5**IR
316*
317      TWOPI = 4.0_dp * ASIN(1.0_dp)
318      I = 1
319*
320      DO 60 LL = 1 , 3
321      NI = NJ(LL)
322      IF (NI.EQ.1) GO TO 60
323*
324      DEL = TWOPI / real(NI,kind=dp)
325      IROT = N / NI
326      KINK = MOD(IROT,NI)
327      KK = 0
328*
329      DO 50 K = 1 , NI
330      ANGLE = real(KK,kind=dp) * DEL
331      TRIGS(I) = COS(ANGLE)
332      TRIGS(I+1) = SIN(ANGLE)
333      I = I + 2
334      KK = KK + KINK
335      IF (KK.GT.NI) KK = KK - NI
336   50 CONTINUE
337   60 CONTINUE
338*
339      RETURN
340      END SUBROUTINE SETGPFA
341
342
343!        SUBROUTINE 'GPFA'
344!        SELF-SORTING IN-PLACE GENERALIZED PRIME FACTOR (COMPLEX) FFT
345!
346!        *** THIS IS THE ALL-FORTRAN VERSION ***
347!            -------------------------------
348!
349!        CALL GPFA(A,B,TRIGS,INC,JUMP,N,LOT,ISIGN)
350!
351!        A IS FIRST REAL INPUT/OUTPUT VECTOR
352!        B IS FIRST IMAGINARY INPUT/OUTPUT VECTOR
353!        TRIGS IS A TABLE OF TWIDDLE FACTORS, PRECALCULATED
354!              BY CALLING SUBROUTINE 'SETGPFA'
355!        INC IS THE INCREMENT WITHIN EACH DATA VECTOR
356!        JUMP IS THE INCREMENT BETWEEN DATA VECTORS
357!        N IS THE LENGTH OF THE TRANSFORMS:
358!          -----------------------------------
359!            N = (2**IP) * (3**IQ) * (5**IR)
360!          -----------------------------------
361!        LOT IS THE NUMBER OF TRANSFORMS
362!        ISIGN = +1 FOR FORWARD TRANSFORM
363!              = -1 FOR INVERSE TRANSFORM
364!
365!        WRITTEN BY CLIVE TEMPERTON
366!        RECHERCHE EN PREVISION NUMERIQUE
367!        ATMOSPHERIC ENVIRONMENT SERVICE, CANADA
368!
369!----------------------------------------------------------------------
370!
371!        DEFINITION OF TRANSFORM
372!        -----------------------
373!
374!        X(J) = SUM(K=0,...,N-1)(C(K)*EXP(ISIGN*2*I*J*K*PI/N))
375!
376!---------------------------------------------------------------------
377!
378!        FOR A MATHEMATICAL DEVELOPMENT OF THE ALGORITHM USED,
379!        SEE:
380!
381!        C TEMPERTON : "A GENERALIZED PRIME FACTOR FFT ALGORITHM
382!          FOR ANY N = (2**P)(3**Q)(5**R)",
383!          SIAM J. SCI. STAT. COMP., MAY 1992.
384!
385
386      SUBROUTINE GPFA(A,B,TRIGS,INC,JUMP,N,LOT,ISIGN)
387
388      IMPLICIT NONE
389*
390      REAL(dp) ::     A(*), B(*)
391      REAL(dp)  TRIGS(*)
392      INTEGER INC, JUMP, N, LOT, ISIGN
393C
394      INTEGER
395     .  NJ(3), NN, IFAC, LL, KK, IP, IQ, IR, I
396*
397*     DECOMPOSE N INTO FACTORS 2,3,5
398*     ------------------------------
399      NN = N
400      IFAC = 2
401*
402      DO 30 LL = 1 , 3
403      KK = 0
404   10 CONTINUE
405      IF (MOD(NN,IFAC).NE.0) GO TO 20
406      KK = KK + 1
407      NN = NN / IFAC
408      GO TO 10
409   20 CONTINUE
410      NJ(LL) = KK
411      IFAC = IFAC + LL
412   30 CONTINUE
413*
414      IF (NN.NE.1) THEN
415         write(msg,"(i0)") N
416         call die('GPFA: '//trim(msg)//' IS NOT A LEGAL VALUE OF N')
417      ENDIF
418*
419      IP = NJ(1)
420      IQ = NJ(2)
421      IR = NJ(3)
422*
423*     COMPUTE THE TRANSFORM
424*     ---------------------
425      I = 1
426      IF (IP.GT.0) THEN
427         CALL GPFA2F(A,B,TRIGS,INC,JUMP,N,IP,LOT,ISIGN)
428         I = I + 2 * ( 2**IP)
429      ENDIF
430      IF (IQ.GT.0) THEN
431         CALL GPFA3F(A,B,TRIGS(I),INC,JUMP,N,IQ,LOT,ISIGN)
432         I = I + 2 * (3**IQ)
433      ENDIF
434      IF (IR.GT.0) THEN
435         CALL GPFA5F(A,B,TRIGS(I),INC,JUMP,N,IR,LOT,ISIGN)
436      ENDIF
437*
438      RETURN
439      END SUBROUTINE GPFA
440
441
442!*********************************************************************
443
444*     fortran version of *gpfa2* -
445*     radix-2 section of self-sorting, in-place, generalized pfa
446*     central radix-2 and radix-8 passes included
447*      so that transform length can be any power of 2
448*
449
450      subroutine gpfa2f(a,b,trigs,inc,jump,n,mm,lot,isign)
451
452      real(dp) ::     a(*), b(*)
453      real(dp)     ::     trigs(*)
454      integer inc, jump, n, mm, lot, isign
455
456      integer  n2, inq, jstepx, ninc, ink, m2, m8,
457     $         m, mh, nblox, left, istart, nb, nvex,
458     $         la, mu, ipass, jstep, jstepl, jjj, ja,
459     $         nu, jb, jc, jd, j, l, kk, k, je, jf,
460     $         jg, jh, laincl, ll, ji, jj, jk, jl, jm,
461     $         jn, jo, jp
462
463      real(dp) :: s, aja, ajc, t0, t2, ajb, ajd,
464     $                t1, t3, bja, bjc, u0, u2, bjb,
465     $                bjd, u1, u3, co1, si1, co2, si2,
466     $                co3, si3, ss, c1, c2, c3, aje,
467     $                ajf, ajg, ajh, bje, bjf, bjg,
468     $                bjh, co4, si4, co5, si5, co6, si6,
469     $                co7, si7, aji, bjm, ajj, bjj, ajk,
470     $                ajl, bji, bjk, ajo, bjl, bjo, ajm,
471     $                ajn, ajp, bjn, bjp
472
473#ifdef OLD_CRAY
474      integer, parameter :: lvr = CRAY_LVR_PARAMETER
475#else
476      integer, parameter :: lvr = 1024
477#endif
478*
479*     ***************************************************************
480*     *                                                             *
481*     *  N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. *
482*     *  RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE *
483*     *  (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER.      *
484*     *                                                             *
485*     ***************************************************************
486*
487      n2 = 2**mm
488      inq = n/n2
489      jstepx = (n2-n) * inc
490      ninc = n * inc
491      ink = inc * inq
492*
493      m2 = 0
494      m8 = 0
495      if (mod(mm,2).eq.0) then
496         m = mm/2
497      else if (mod(mm,4).eq.1) then
498         m = (mm-1)/2
499         m2 = 1
500      else if (mod(mm,4).eq.3) then
501         m = (mm-3)/2
502         m8 = 1
503      endif
504      mh = (m+1)/2
505*
506      nblox = 1 + (lot-1)/lvr
507      left = lot
508      s = real(isign,kind=dp)
509      istart = 1
510*
511*  loop on blocks of lvr transforms
512*  --------------------------------
513      do 500 nb = 1 , nblox
514*
515      if (left.le.lvr) then
516         nvex = left
517      else if (left.lt.(2*lvr)) then
518         nvex = left/2
519         nvex = nvex + mod(nvex,2)
520      else
521         nvex = lvr
522      endif
523      left = left - nvex
524*
525      la = 1
526*
527*  loop on type I radix-4 passes
528*  -----------------------------
529      mu = mod(inq,4)
530      if (isign.eq.-1) mu = 4 - mu
531      ss = 1.0_dp
532      if (mu.eq.3) ss = -1.0_dp
533*
534      if (mh.eq.0) go to 200
535*
536      do 160 ipass = 1 , mh
537      jstep = (n*inc) / (4*la)
538      jstepl = jstep - ninc
539*
540*  k = 0 loop (no twiddle factors)
541*  -------------------------------
542      do 120 jjj = 0 , (n-1)*inc , 4*jstep
543      ja = istart + jjj
544*
545*     "transverse" loop
546*     -----------------
547      do 115 nu = 1 , inq
548      jb = ja + jstepl
549      if (jb.lt.istart) jb = jb + ninc
550      jc = jb + jstepl
551      if (jc.lt.istart) jc = jc + ninc
552      jd = jc + jstepl
553      if (jd.lt.istart) jd = jd + ninc
554      j = 0
555*
556*  loop across transforms
557*  ----------------------
558#ifdef OLD_CRAY
559cdir$ ivdep, shortloop
560#endif
561      do 110 l = 1 , nvex
562      aja = a(ja+j)
563      ajc = a(jc+j)
564      t0 = aja + ajc
565      t2 = aja - ajc
566      ajb = a(jb+j)
567      ajd = a(jd+j)
568      t1 = ajb + ajd
569      t3 = ss * ( ajb - ajd )
570      bja = b(ja+j)
571      bjc = b(jc+j)
572      u0 = bja + bjc
573      u2 = bja - bjc
574      bjb = b(jb+j)
575      bjd = b(jd+j)
576      u1 = bjb + bjd
577      u3 = ss * ( bjb - bjd )
578      a(ja+j) = t0 + t1
579      a(jc+j) = t0 - t1
580      b(ja+j) = u0 + u1
581      b(jc+j) = u0 - u1
582      a(jb+j) = t2 - u3
583      a(jd+j) = t2 + u3
584      b(jb+j) = u2 + t3
585      b(jd+j) = u2 - t3
586      j = j + jump
587  110 continue
588      ja = ja + jstepx
589      if (ja.lt.istart) ja = ja + ninc
590  115 continue
591  120 continue
592*
593*  finished if n2 = 4
594*  ------------------
595      if (n2.eq.4) go to 490
596      kk = 2 * la
597*
598*  loop on nonzero k
599*  -----------------
600      do 150 k = ink , jstep-ink , ink
601      co1 = trigs(kk+1)
602      si1 = s*trigs(kk+2)
603      co2 = trigs(2*kk+1)
604      si2 = s*trigs(2*kk+2)
605      co3 = trigs(3*kk+1)
606      si3 = s*trigs(3*kk+2)
607*
608*  loop along transform
609*  --------------------
610      do 140 jjj = k , (n-1)*inc , 4*jstep
611      ja = istart + jjj
612*
613*     "transverse" loop
614*     -----------------
615      do 135 nu = 1 , inq
616      jb = ja + jstepl
617      if (jb.lt.istart) jb = jb + ninc
618      jc = jb + jstepl
619      if (jc.lt.istart) jc = jc + ninc
620      jd = jc + jstepl
621      if (jd.lt.istart) jd = jd + ninc
622      j = 0
623*
624*  loop across transforms
625*  ----------------------
626#ifdef OLD_CRAY
627cdir$ ivdep, shortloop
628#endif
629      do 130 l = 1 , nvex
630      aja = a(ja+j)
631      ajc = a(jc+j)
632      t0 = aja + ajc
633      t2 = aja - ajc
634      ajb = a(jb+j)
635      ajd = a(jd+j)
636      t1 = ajb + ajd
637      t3 = ss * ( ajb - ajd )
638      bja = b(ja+j)
639      bjc = b(jc+j)
640      u0 = bja + bjc
641      u2 = bja - bjc
642      bjb = b(jb+j)
643      bjd = b(jd+j)
644      u1 = bjb + bjd
645      u3 = ss * ( bjb - bjd )
646      a(ja+j) = t0 + t1
647      b(ja+j) = u0 + u1
648      a(jb+j) = co1*(t2-u3) - si1*(u2+t3)
649      b(jb+j) = si1*(t2-u3) + co1*(u2+t3)
650      a(jc+j) = co2*(t0-t1) - si2*(u0-u1)
651      b(jc+j) = si2*(t0-t1) + co2*(u0-u1)
652      a(jd+j) = co3*(t2+u3) - si3*(u2-t3)
653      b(jd+j) = si3*(t2+u3) + co3*(u2-t3)
654      j = j + jump
655  130 continue
656*-----( end of loop across transforms )
657      ja = ja + jstepx
658      if (ja.lt.istart) ja = ja + ninc
659  135 continue
660  140 continue
661*-----( end of loop along transforms )
662      kk = kk + 2*la
663  150 continue
664*-----( end of loop on nonzero k )
665      la = 4*la
666  160 continue
667*-----( end of loop on type I radix-4 passes)
668*
669*  central radix-2 pass
670*  --------------------
671  200 continue
672      if (m2.eq.0) go to 300
673*
674      jstep = (n*inc) / (2*la)
675      jstepl = jstep - ninc
676*
677*  k=0 loop (no twiddle factors)
678*  -----------------------------
679      do 220 jjj = 0 , (n-1)*inc , 2*jstep
680      ja = istart + jjj
681*
682*     "transverse" loop
683*     -----------------
684      do 215 nu = 1 , inq
685      jb = ja + jstepl
686      if (jb.lt.istart) jb = jb + ninc
687      j = 0
688*
689*  loop across transforms
690*  ----------------------
691#ifdef OLD_CRAY
692cdir$ ivdep, shortloop
693#endif
694      do 210 l = 1 , nvex
695      aja = a(ja+j)
696      ajb = a(jb+j)
697      t0 = aja - ajb
698      a(ja+j) = aja + ajb
699      a(jb+j) = t0
700      bja = b(ja+j)
701      bjb = b(jb+j)
702      u0 = bja - bjb
703      b(ja+j) = bja + bjb
704      b(jb+j) = u0
705      j = j + jump
706  210 continue
707*-----(end of loop across transforms)
708      ja = ja + jstepx
709      if (ja.lt.istart) ja = ja + ninc
710  215 continue
711  220 continue
712*
713*  finished if n2=2
714*  ----------------
715      if (n2.eq.2) go to 490
716*
717      kk = 2 * la
718*
719*  loop on nonzero k
720*  -----------------
721      do 260 k = ink , jstep - ink , ink
722      co1 = trigs(kk+1)
723      si1 = s*trigs(kk+2)
724*
725*  loop along transforms
726*  ---------------------
727      do 250 jjj = k , (n-1)*inc , 2*jstep
728      ja = istart + jjj
729*
730*     "transverse" loop
731*     -----------------
732      do 245 nu = 1 , inq
733      jb = ja + jstepl
734      if (jb.lt.istart) jb = jb + ninc
735      j = 0
736*
737*  loop across transforms
738*  ----------------------
739      if (kk.eq.n2/2) then
740#ifdef OLD_CRAY
741cdir$ ivdep, shortloop
742#endif
743      do 230 l = 1 , nvex
744      aja = a(ja+j)
745      ajb = a(jb+j)
746      t0 = ss * ( aja - ajb )
747      a(ja+j) = aja + ajb
748      bjb = b(jb+j)
749      bja = b(ja+j)
750      a(jb+j) = ss * ( bjb - bja )
751      b(ja+j) = bja + bjb
752      b(jb+j) = t0
753      j = j + jump
754  230 continue
755*
756      else
757*
758#ifdef OLD_CRAY
759cdir$ ivdep, shortloop
760#endif
761      do 240 l = 1 , nvex
762      aja = a(ja+j)
763      ajb = a(jb+j)
764      t0 = aja - ajb
765      a(ja+j) = aja + ajb
766      bja = b(ja+j)
767      bjb = b(jb+j)
768      u0 = bja - bjb
769      b(ja+j) = bja + bjb
770      a(jb+j) = co1*t0 - si1*u0
771      b(jb+j) = si1*t0 + co1*u0
772      j = j + jump
773  240 continue
774*
775      endif
776*
777*-----(end of loop across transforms)
778      ja = ja + jstepx
779      if (ja.lt.istart) ja = ja + ninc
780  245 continue
781  250 continue
782*-----(end of loop along transforms)
783      kk = kk + 2 * la
784  260 continue
785*-----(end of loop on nonzero k)
786*-----(end of radix-2 pass)
787*
788      la = 2 * la
789      go to 400
790*
791*  central radix-8 pass
792*  --------------------
793  300 continue
794      if (m8.eq.0) go to 400
795      jstep = (n*inc) / (8*la)
796      jstepl = jstep - ninc
797      mu = mod(inq,8)
798      if (isign.eq.-1) mu = 8 - mu
799      c1 = 1.0_dp
800      if (mu.eq.3.or.mu.eq.7) c1 = -1.0_dp
801      c2 = sqrt(0.5_dp)
802      if (mu.eq.3.or.mu.eq.5) c2 = -c2
803      c3 = c1 * c2
804*
805*  stage 1
806*  -------
807      do 320 k = 0 , jstep - ink , ink
808      do 315 jjj = k , (n-1)*inc , 8*jstep
809      ja = istart + jjj
810*
811*     "transverse" loop
812*     -----------------
813      do 312 nu = 1 , inq
814      jb = ja + jstepl
815      if (jb.lt.istart) jb = jb + ninc
816      jc = jb + jstepl
817      if (jc.lt.istart) jc = jc + ninc
818      jd = jc + jstepl
819      if (jd.lt.istart) jd = jd + ninc
820      je = jd + jstepl
821      if (je.lt.istart) je = je + ninc
822      jf = je + jstepl
823      if (jf.lt.istart) jf = jf + ninc
824      jg = jf + jstepl
825      if (jg.lt.istart) jg = jg + ninc
826      jh = jg + jstepl
827      if (jh.lt.istart) jh = jh + ninc
828      j = 0
829#ifdef OLD_CRAY
830cdir$ ivdep, shortloop
831#endif
832      do 310 l = 1 , nvex
833      aja = a(ja+j)
834      aje = a(je+j)
835      t0 = aja - aje
836      a(ja+j) = aja + aje
837      ajc = a(jc+j)
838      ajg = a(jg+j)
839      t1 = c1 * ( ajc - ajg )
840      a(je+j) = ajc + ajg
841      ajb = a(jb+j)
842      ajf = a(jf+j)
843      t2 = ajb - ajf
844      a(jc+j) = ajb + ajf
845      ajd = a(jd+j)
846      ajh = a(jh+j)
847      t3 = ajd - ajh
848      a(jg+j) = ajd + ajh
849      a(jb+j) = t0
850      a(jf+j) = t1
851      a(jd+j) = c2 * ( t2 - t3 )
852      a(jh+j) = c3 * ( t2 + t3 )
853      bja = b(ja+j)
854      bje = b(je+j)
855      u0 = bja - bje
856      b(ja+j) = bja + bje
857      bjc = b(jc+j)
858      bjg = b(jg+j)
859      u1 = c1 * ( bjc - bjg )
860      b(je+j) = bjc + bjg
861      bjb = b(jb+j)
862      bjf = b(jf+j)
863      u2 = bjb - bjf
864      b(jc+j) = bjb + bjf
865      bjd = b(jd+j)
866      bjh = b(jh+j)
867      u3 = bjd - bjh
868      b(jg+j) = bjd + bjh
869      b(jb+j) = u0
870      b(jf+j) = u1
871      b(jd+j) = c2 * ( u2 - u3 )
872      b(jh+j) = c3 * ( u2 + u3 )
873      j = j + jump
874  310 continue
875      ja = ja + jstepx
876      if (ja.lt.istart) ja = ja + ninc
877  312 continue
878  315 continue
879  320 continue
880*
881*  stage 2
882*  -------
883*
884*  k=0 (no twiddle factors)
885*  ------------------------
886      do 330 jjj = 0 , (n-1)*inc , 8*jstep
887      ja = istart + jjj
888*
889*     "transverse" loop
890*     -----------------
891      do 328 nu = 1 , inq
892      jb = ja + jstepl
893      if (jb.lt.istart) jb = jb + ninc
894      jc = jb + jstepl
895      if (jc.lt.istart) jc = jc + ninc
896      jd = jc + jstepl
897      if (jd.lt.istart) jd = jd + ninc
898      je = jd + jstepl
899      if (je.lt.istart) je = je + ninc
900      jf = je + jstepl
901      if (jf.lt.istart) jf = jf + ninc
902      jg = jf + jstepl
903      if (jg.lt.istart) jg = jg + ninc
904      jh = jg + jstepl
905      if (jh.lt.istart) jh = jh + ninc
906      j = 0
907#ifdef OLD_CRAY
908cdir$ ivdep, shortloop
909#endif
910      do 325 l = 1 , nvex
911      aja = a(ja+j)
912      aje = a(je+j)
913      t0 = aja + aje
914      t2 = aja - aje
915      ajc = a(jc+j)
916      ajg = a(jg+j)
917      t1 = ajc + ajg
918      t3 = c1 * ( ajc - ajg )
919      bja = b(ja+j)
920      bje = b(je+j)
921      u0 = bja + bje
922      u2 = bja - bje
923      bjc = b(jc+j)
924      bjg = b(jg+j)
925      u1 = bjc + bjg
926      u3 = c1 * ( bjc - bjg )
927      a(ja+j) = t0 + t1
928      a(je+j) = t0 - t1
929      b(ja+j) = u0 + u1
930      b(je+j) = u0 - u1
931      a(jc+j) = t2 - u3
932      a(jg+j) = t2 + u3
933      b(jc+j) = u2 + t3
934      b(jg+j) = u2 - t3
935      ajb = a(jb+j)
936      ajd = a(jd+j)
937      t0 = ajb + ajd
938      t2 = ajb - ajd
939      ajf = a(jf+j)
940      ajh = a(jh+j)
941      t1 = ajf - ajh
942      t3 = ajf + ajh
943      bjb = b(jb+j)
944      bjd = b(jd+j)
945      u0 = bjb + bjd
946      u2 = bjb - bjd
947      bjf = b(jf+j)
948      bjh = b(jh+j)
949      u1 = bjf - bjh
950      u3 = bjf + bjh
951      a(jb+j) = t0 - u3
952      a(jh+j) = t0 + u3
953      b(jb+j) = u0 + t3
954      b(jh+j) = u0 - t3
955      a(jd+j) = t2 + u1
956      a(jf+j) = t2 - u1
957      b(jd+j) = u2 - t1
958      b(jf+j) = u2 + t1
959      j = j + jump
960  325 continue
961      ja = ja + jstepx
962      if (ja.lt.istart) ja = ja + ninc
963  328 continue
964  330 continue
965*
966      if (n2.eq.8) go to 490
967*
968*  loop on nonzero k
969*  -----------------
970      kk = 2 * la
971*
972      do 350 k = ink , jstep - ink , ink
973*
974      co1 = trigs(kk+1)
975      si1 = s * trigs(kk+2)
976      co2 = trigs(2*kk+1)
977      si2 = s * trigs(2*kk+2)
978      co3 = trigs(3*kk+1)
979      si3 = s * trigs(3*kk+2)
980      co4 = trigs(4*kk+1)
981      si4 = s * trigs(4*kk+2)
982      co5 = trigs(5*kk+1)
983      si5 = s * trigs(5*kk+2)
984      co6 = trigs(6*kk+1)
985      si6 = s * trigs(6*kk+2)
986      co7 = trigs(7*kk+1)
987      si7 = s * trigs(7*kk+2)
988*
989      do 345 jjj = k , (n-1)*inc , 8*jstep
990      ja = istart + jjj
991*
992*     "transverse" loop
993*     -----------------
994      do 342 nu = 1 , inq
995      jb = ja + jstepl
996      if (jb.lt.istart) jb = jb + ninc
997      jc = jb + jstepl
998      if (jc.lt.istart) jc = jc + ninc
999      jd = jc + jstepl
1000      if (jd.lt.istart) jd = jd + ninc
1001      je = jd + jstepl
1002      if (je.lt.istart) je = je + ninc
1003      jf = je + jstepl
1004      if (jf.lt.istart) jf = jf + ninc
1005      jg = jf + jstepl
1006      if (jg.lt.istart) jg = jg + ninc
1007      jh = jg + jstepl
1008      if (jh.lt.istart) jh = jh + ninc
1009      j = 0
1010#ifdef OLD_CRAY
1011cdir$ ivdep, shortloop
1012#endif
1013      do 340 l = 1 , nvex
1014      aja = a(ja+j)
1015      aje = a(je+j)
1016      t0 = aja + aje
1017      t2 = aja - aje
1018      ajc = a(jc+j)
1019      ajg = a(jg+j)
1020      t1 = ajc + ajg
1021      t3 = c1 * ( ajc - ajg )
1022      bja = b(ja+j)
1023      bje = b(je+j)
1024      u0 = bja + bje
1025      u2 = bja - bje
1026      bjc = b(jc+j)
1027      bjg = b(jg+j)
1028      u1 = bjc + bjg
1029      u3 = c1 * ( bjc - bjg )
1030      a(ja+j) = t0 + t1
1031      b(ja+j) = u0 + u1
1032      a(je+j) = co4*(t0-t1) - si4*(u0-u1)
1033      b(je+j) = si4*(t0-t1) + co4*(u0-u1)
1034      a(jc+j) = co2*(t2-u3) - si2*(u2+t3)
1035      b(jc+j) = si2*(t2-u3) + co2*(u2+t3)
1036      a(jg+j) = co6*(t2+u3) - si6*(u2-t3)
1037      b(jg+j) = si6*(t2+u3) + co6*(u2-t3)
1038      ajb = a(jb+j)
1039      ajd = a(jd+j)
1040      t0 = ajb + ajd
1041      t2 = ajb - ajd
1042      ajf = a(jf+j)
1043      ajh = a(jh+j)
1044      t1 = ajf - ajh
1045      t3 = ajf + ajh
1046      bjb = b(jb+j)
1047      bjd = b(jd+j)
1048      u0 = bjb + bjd
1049      u2 = bjb - bjd
1050      bjf = b(jf+j)
1051      bjh = b(jh+j)
1052      u1 = bjf - bjh
1053      u3 = bjf + bjh
1054      a(jb+j) = co1*(t0-u3) - si1*(u0+t3)
1055      b(jb+j) = si1*(t0-u3) + co1*(u0+t3)
1056      a(jh+j) = co7*(t0+u3) - si7*(u0-t3)
1057      b(jh+j) = si7*(t0+u3) + co7*(u0-t3)
1058      a(jd+j) = co3*(t2+u1) - si3*(u2-t1)
1059      b(jd+j) = si3*(t2+u1) + co3*(u2-t1)
1060      a(jf+j) = co5*(t2-u1) - si5*(u2+t1)
1061      b(jf+j) = si5*(t2-u1) + co5*(u2+t1)
1062      j = j + jump
1063  340 continue
1064      ja = ja + jstepx
1065      if (ja.lt.istart) ja = ja + ninc
1066  342 continue
1067  345 continue
1068      kk = kk + 2 * la
1069  350 continue
1070*
1071      la = 8 * la
1072*
1073*  loop on type II radix-4 passes
1074*  ------------------------------
1075  400 continue
1076      mu = mod(inq,4)
1077      if (isign.eq.-1) mu = 4 - mu
1078      ss = 1.0_dp
1079      if (mu.eq.3) ss = -1.0_dp
1080*
1081      do 480 ipass = mh+1 , m
1082      jstep = (n*inc) / (4*la)
1083      jstepl = jstep - ninc
1084      laincl = la * ink - ninc
1085*
1086*  k=0 loop (no twiddle factors)
1087*  -----------------------------
1088      do 430 ll = 0 , (la-1)*ink , 4*jstep
1089*
1090      do 420 jjj = ll , (n-1)*inc , 4*la*ink
1091      ja = istart + jjj
1092*
1093*     "transverse" loop
1094*     -----------------
1095      do 415 nu = 1 , inq
1096      jb = ja + jstepl
1097      if (jb.lt.istart) jb = jb + ninc
1098      jc = jb + jstepl
1099      if (jc.lt.istart) jc = jc + ninc
1100      jd = jc + jstepl
1101      if (jd.lt.istart) jd = jd + ninc
1102      je = ja + laincl
1103      if (je.lt.istart) je = je + ninc
1104      jf = je + jstepl
1105      if (jf.lt.istart) jf = jf + ninc
1106      jg = jf + jstepl
1107      if (jg.lt.istart) jg = jg + ninc
1108      jh = jg + jstepl
1109      if (jh.lt.istart) jh = jh + ninc
1110      ji = je + laincl
1111      if (ji.lt.istart) ji = ji + ninc
1112      jj = ji + jstepl
1113      if (jj.lt.istart) jj = jj + ninc
1114      jk = jj + jstepl
1115      if (jk.lt.istart) jk = jk + ninc
1116      jl = jk + jstepl
1117      if (jl.lt.istart) jl = jl + ninc
1118      jm = ji + laincl
1119      if (jm.lt.istart) jm = jm + ninc
1120      jn = jm + jstepl
1121      if (jn.lt.istart) jn = jn + ninc
1122      jo = jn + jstepl
1123      if (jo.lt.istart) jo = jo + ninc
1124      jp = jo + jstepl
1125      if (jp.lt.istart) jp = jp + ninc
1126      j = 0
1127*
1128*  loop across transforms
1129*  ----------------------
1130#ifdef OLD_CRAY
1131cdir$ ivdep, shortloop
1132#endif
1133      do 410 l = 1 , nvex
1134      aja = a(ja+j)
1135      ajc = a(jc+j)
1136      t0 = aja + ajc
1137      t2 = aja - ajc
1138      ajb = a(jb+j)
1139      ajd = a(jd+j)
1140      t1 = ajb + ajd
1141      t3 = ss * ( ajb - ajd )
1142      aji = a(ji+j)
1143      ajc =  aji
1144      bja = b(ja+j)
1145      bjc = b(jc+j)
1146      u0 = bja + bjc
1147      u2 = bja - bjc
1148      bjb = b(jb+j)
1149      bjd = b(jd+j)
1150      u1 = bjb + bjd
1151      u3 = ss * ( bjb - bjd )
1152      aje = a(je+j)
1153      ajb =  aje
1154      a(ja+j) = t0 + t1
1155      a(ji+j) = t0 - t1
1156      b(ja+j) = u0 + u1
1157      bjc =  u0 - u1
1158      bjm = b(jm+j)
1159      bjd =  bjm
1160      a(je+j) = t2 - u3
1161      ajd =  t2 + u3
1162      bjb =  u2 + t3
1163      b(jm+j) = u2 - t3
1164*----------------------
1165      ajg = a(jg+j)
1166      t0 = ajb + ajg
1167      t2 = ajb - ajg
1168      ajf = a(jf+j)
1169      ajh = a(jh+j)
1170      t1 = ajf + ajh
1171      t3 = ss * ( ajf - ajh )
1172      ajj = a(jj+j)
1173      ajg =  ajj
1174      bje = b(je+j)
1175      bjg = b(jg+j)
1176      u0 = bje + bjg
1177      u2 = bje - bjg
1178      bjf = b(jf+j)
1179      bjh = b(jh+j)
1180      u1 = bjf + bjh
1181      u3 = ss * ( bjf - bjh )
1182      b(je+j) = bjb
1183      a(jb+j) = t0 + t1
1184      a(jj+j) = t0 - t1
1185      bjj = b(jj+j)
1186      bjg =  bjj
1187      b(jb+j) = u0 + u1
1188      b(jj+j) = u0 - u1
1189      a(jf+j) = t2 - u3
1190      ajh =  t2 + u3
1191      b(jf+j) = u2 + t3
1192      bjh =  u2 - t3
1193*----------------------
1194      ajk = a(jk+j)
1195      t0 = ajc + ajk
1196      t2 = ajc - ajk
1197      ajl = a(jl+j)
1198      t1 = ajg + ajl
1199      t3 = ss * ( ajg - ajl )
1200      bji = b(ji+j)
1201      bjk = b(jk+j)
1202      u0 = bji + bjk
1203      u2 = bji - bjk
1204      ajo = a(jo+j)
1205      ajl =  ajo
1206      bjl = b(jl+j)
1207      u1 = bjg + bjl
1208      u3 = ss * ( bjg - bjl )
1209      b(ji+j) = bjc
1210      a(jc+j) = t0 + t1
1211      a(jk+j) = t0 - t1
1212      bjo = b(jo+j)
1213      bjl =  bjo
1214      b(jc+j) = u0 + u1
1215      b(jk+j) = u0 - u1
1216      a(jg+j) = t2 - u3
1217      a(jo+j) = t2 + u3
1218      b(jg+j) = u2 + t3
1219      b(jo+j) = u2 - t3
1220*----------------------
1221      ajm = a(jm+j)
1222      t0 = ajm + ajl
1223      t2 = ajm - ajl
1224      ajn = a(jn+j)
1225      ajp = a(jp+j)
1226      t1 = ajn + ajp
1227      t3 = ss * ( ajn - ajp )
1228      a(jm+j) = ajd
1229      u0 = bjd + bjl
1230      u2 = bjd - bjl
1231      bjn = b(jn+j)
1232      bjp = b(jp+j)
1233      u1 = bjn + bjp
1234      u3 = ss * ( bjn - bjp )
1235      a(jn+j) = ajh
1236      a(jd+j) = t0 + t1
1237      a(jl+j) = t0 - t1
1238      b(jd+j) = u0 + u1
1239      b(jl+j) = u0 - u1
1240      b(jn+j) = bjh
1241      a(jh+j) = t2 - u3
1242      a(jp+j) = t2 + u3
1243      b(jh+j) = u2 + t3
1244      b(jp+j) = u2 - t3
1245      j = j + jump
1246  410 continue
1247*-----( end of loop across transforms )
1248      ja = ja + jstepx
1249      if (ja.lt.istart) ja = ja + ninc
1250  415 continue
1251  420 continue
1252  430 continue
1253*-----( end of double loop for k=0 )
1254*
1255*  finished if last pass
1256*  ---------------------
1257      if (ipass.eq.m) go to 490
1258*
1259      kk = 2*la
1260*
1261*     loop on nonzero k
1262*     -----------------
1263      do 470 k = ink , jstep-ink , ink
1264      co1 = trigs(kk+1)
1265      si1 = s*trigs(kk+2)
1266      co2 = trigs(2*kk+1)
1267      si2 = s*trigs(2*kk+2)
1268      co3 = trigs(3*kk+1)
1269      si3 = s*trigs(3*kk+2)
1270*
1271*  double loop along first transform in block
1272*  ------------------------------------------
1273      do 460 ll = k , (la-1)*ink , 4*jstep
1274*
1275      do 450 jjj = ll , (n-1)*inc , 4*la*ink
1276      ja = istart + jjj
1277*
1278*     "transverse" loop
1279*     -----------------
1280      do 445 nu = 1 , inq
1281      jb = ja + jstepl
1282      if (jb.lt.istart) jb = jb + ninc
1283      jc = jb + jstepl
1284      if (jc.lt.istart) jc = jc + ninc
1285      jd = jc + jstepl
1286      if (jd.lt.istart) jd = jd + ninc
1287      je = ja + laincl
1288      if (je.lt.istart) je = je + ninc
1289      jf = je + jstepl
1290      if (jf.lt.istart) jf = jf + ninc
1291      jg = jf + jstepl
1292      if (jg.lt.istart) jg = jg + ninc
1293      jh = jg + jstepl
1294      if (jh.lt.istart) jh = jh + ninc
1295      ji = je + laincl
1296      if (ji.lt.istart) ji = ji + ninc
1297      jj = ji + jstepl
1298      if (jj.lt.istart) jj = jj + ninc
1299      jk = jj + jstepl
1300      if (jk.lt.istart) jk = jk + ninc
1301      jl = jk + jstepl
1302      if (jl.lt.istart) jl = jl + ninc
1303      jm = ji + laincl
1304      if (jm.lt.istart) jm = jm + ninc
1305      jn = jm + jstepl
1306      if (jn.lt.istart) jn = jn + ninc
1307      jo = jn + jstepl
1308      if (jo.lt.istart) jo = jo + ninc
1309      jp = jo + jstepl
1310      if (jp.lt.istart) jp = jp + ninc
1311      j = 0
1312*
1313*  loop across transforms
1314*  ----------------------
1315#ifdef OLD_CRAY
1316cdir$ ivdep, shortloop
1317#endif
1318      do 440 l = 1 , nvex
1319      aja = a(ja+j)
1320      ajc = a(jc+j)
1321      t0 = aja + ajc
1322      t2 = aja - ajc
1323      ajb = a(jb+j)
1324      ajd = a(jd+j)
1325      t1 = ajb + ajd
1326      t3 = ss * ( ajb - ajd )
1327      aji = a(ji+j)
1328      ajc =  aji
1329      bja = b(ja+j)
1330      bjc = b(jc+j)
1331      u0 = bja + bjc
1332      u2 = bja - bjc
1333      bjb = b(jb+j)
1334      bjd = b(jd+j)
1335      u1 = bjb + bjd
1336      u3 = ss * ( bjb - bjd )
1337      aje = a(je+j)
1338      ajb =  aje
1339      a(ja+j) = t0 + t1
1340      b(ja+j) = u0 + u1
1341      a(je+j) = co1*(t2-u3) - si1*(u2+t3)
1342      bjb =  si1*(t2-u3) + co1*(u2+t3)
1343      bjm = b(jm+j)
1344      bjd =  bjm
1345      a(ji+j) = co2*(t0-t1) - si2*(u0-u1)
1346      bjc =  si2*(t0-t1) + co2*(u0-u1)
1347      ajd =  co3*(t2+u3) - si3*(u2-t3)
1348      b(jm+j) = si3*(t2+u3) + co3*(u2-t3)
1349*----------------------------------------
1350      ajg = a(jg+j)
1351      t0 = ajb + ajg
1352      t2 = ajb - ajg
1353      ajf = a(jf+j)
1354      ajh = a(jh+j)
1355      t1 = ajf + ajh
1356      t3 = ss * ( ajf - ajh )
1357      ajj = a(jj+j)
1358      ajg =  ajj
1359      bje = b(je+j)
1360      bjg = b(jg+j)
1361      u0 = bje + bjg
1362      u2 = bje - bjg
1363      bjf = b(jf+j)
1364      bjh = b(jh+j)
1365      u1 = bjf + bjh
1366      u3 = ss * ( bjf - bjh )
1367      b(je+j) = bjb
1368      a(jb+j) = t0 + t1
1369      b(jb+j) = u0 + u1
1370      bjj = b(jj+j)
1371      bjg =  bjj
1372      a(jf+j) = co1*(t2-u3) - si1*(u2+t3)
1373      b(jf+j) = si1*(t2-u3) + co1*(u2+t3)
1374      a(jj+j) = co2*(t0-t1) - si2*(u0-u1)
1375      b(jj+j) = si2*(t0-t1) + co2*(u0-u1)
1376      ajh =  co3*(t2+u3) - si3*(u2-t3)
1377      bjh =  si3*(t2+u3) + co3*(u2-t3)
1378*----------------------------------------
1379      ajk = a(jk+j)
1380      t0 = ajc + ajk
1381      t2 = ajc - ajk
1382      ajl = a(jl+j)
1383      t1 = ajg + ajl
1384      t3 = ss * ( ajg - ajl )
1385      bji = b(ji+j)
1386      bjk = b(jk+j)
1387      u0 = bji + bjk
1388      u2 = bji - bjk
1389      ajo = a(jo+j)
1390      ajl =  ajo
1391      bjl = b(jl+j)
1392      u1 = bjg + bjl
1393      u3 = ss * ( bjg - bjl )
1394      b(ji+j) = bjc
1395      a(jc+j) = t0 + t1
1396      b(jc+j) = u0 + u1
1397      bjo = b(jo+j)
1398      bjl =  bjo
1399      a(jg+j) = co1*(t2-u3) - si1*(u2+t3)
1400      b(jg+j) = si1*(t2-u3) + co1*(u2+t3)
1401      a(jk+j) = co2*(t0-t1) - si2*(u0-u1)
1402      b(jk+j) = si2*(t0-t1) + co2*(u0-u1)
1403      a(jo+j) = co3*(t2+u3) - si3*(u2-t3)
1404      b(jo+j) = si3*(t2+u3) + co3*(u2-t3)
1405*----------------------------------------
1406      ajm = a(jm+j)
1407      t0 = ajm + ajl
1408      t2 = ajm - ajl
1409      ajn = a(jn+j)
1410      ajp = a(jp+j)
1411      t1 = ajn + ajp
1412      t3 = ss * ( ajn - ajp )
1413      a(jm+j) = ajd
1414      u0 = bjd + bjl
1415      u2 = bjd - bjl
1416      a(jn+j) = ajh
1417      bjn = b(jn+j)
1418      bjp = b(jp+j)
1419      u1 = bjn + bjp
1420      u3 = ss * ( bjn - bjp )
1421      b(jn+j) = bjh
1422      a(jd+j) = t0 + t1
1423      b(jd+j) = u0 + u1
1424      a(jh+j) = co1*(t2-u3) - si1*(u2+t3)
1425      b(jh+j) = si1*(t2-u3) + co1*(u2+t3)
1426      a(jl+j) = co2*(t0-t1) - si2*(u0-u1)
1427      b(jl+j) = si2*(t0-t1) + co2*(u0-u1)
1428      a(jp+j) = co3*(t2+u3) - si3*(u2-t3)
1429      b(jp+j) = si3*(t2+u3) + co3*(u2-t3)
1430      j = j + jump
1431  440 continue
1432*-----(end of loop across transforms)
1433      ja = ja + jstepx
1434      if (ja.lt.istart) ja = ja + ninc
1435  445 continue
1436  450 continue
1437  460 continue
1438*-----( end of double loop for this k )
1439      kk = kk + 2*la
1440  470 continue
1441*-----( end of loop over values of k )
1442      la = 4*la
1443  480 continue
1444*-----( end of loop on type II radix-4 passes )
1445*-----( nvex transforms completed)
1446  490 continue
1447      istart = istart + nvex * jump
1448  500 continue
1449*-----( end of loop on blocks of transforms )
1450*
1451      return
1452      end subroutine gpfa2f
1453
1454!******************************************************************
1455
1456*     fortran version of *gpfa3* -
1457*     radix-3 section of self-sorting, in-place
1458*        generalized PFA
1459*
1460*-------------------------------------------------------------------
1461*
1462      subroutine gpfa3f(a,b,trigs,inc,jump,n,mm,lot,isign)
1463
1464      real(dp) ::     a(*), b(*)
1465      real(dp)     ::     trigs(*)
1466      integer inc, jump, n, mm, lot, isign
1467
1468      real(dp), parameter :: sin60 = 0.866025403784437_dp
1469
1470      integer      :: je, jf, jg, jh, laincl, ll, ji
1471      integer      :: n3, inq, jstepx, ninc, ink, mu, m, mh,
1472     $                nblox, left, istart, nb, nvex, la,
1473     $                ipass, jstep, jstepl, jjj, ja, nu, jb,
1474     $                jc, j, l, kk, k, jd
1475
1476      real(dp) :: s, ajb, ajc, t1, aja, t2, t3, bjb, bjc,
1477     $                u1, bja, u2, u3, co1, si1, co2, si2, ajd,
1478     $                bjd
1479      real(dp) :: c1, aje, ajg, ajf, ajh, bje,
1480     $                bjf, bjg, bjh, aji, bji
1481
1482
1483      integer, parameter :: lvr = 128
1484*
1485*     ***************************************************************
1486*     *                                                             *
1487*     *  N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. *
1488*     *  RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE *
1489*     *  (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER.      *
1490*     *                                                             *
1491*     ***************************************************************
1492*
1493      n3 = 3**mm
1494      inq = n/n3
1495      jstepx = (n3-n) * inc
1496      ninc = n * inc
1497      ink = inc * inq
1498      mu = mod(inq,3)
1499      if (isign.eq.-1) mu = 3-mu
1500      m = mm
1501      mh = (m+1)/2
1502      s = real(isign,kind=dp)
1503      c1 = sin60
1504      if (mu.eq.2) c1 = -c1
1505*
1506      nblox = 1 + (lot-1)/lvr
1507      left = lot
1508      s = real(isign,kind=dp)
1509      istart = 1
1510*
1511*  loop on blocks of lvr transforms
1512*  --------------------------------
1513      do 500 nb = 1 , nblox
1514*
1515      if (left.le.lvr) then
1516         nvex = left
1517      else if (left.lt.(2*lvr)) then
1518         nvex = left/2
1519         nvex = nvex + mod(nvex,2)
1520      else
1521         nvex = lvr
1522      endif
1523      left = left - nvex
1524*
1525      la = 1
1526*
1527*  loop on type I radix-3 passes
1528*  -----------------------------
1529      do 160 ipass = 1 , mh
1530      jstep = (n*inc) / (3*la)
1531      jstepl = jstep - ninc
1532*
1533*  k = 0 loop (no twiddle factors)
1534*  -------------------------------
1535      do 120 jjj = 0 , (n-1)*inc , 3*jstep
1536      ja = istart + jjj
1537*
1538*  "transverse" loop
1539*  -----------------
1540      do 115 nu = 1 , inq
1541      jb = ja + jstepl
1542      if (jb.lt.istart) jb = jb + ninc
1543      jc = jb + jstepl
1544      if (jc.lt.istart) jc = jc + ninc
1545      j = 0
1546*
1547*  loop across transforms
1548*  ----------------------
1549#ifdef OLD_CRAY
1550cdir$ ivdep, shortloop
1551#endif
1552      do 110 l = 1 , nvex
1553      ajb = a(jb+j)
1554      ajc = a(jc+j)
1555      t1 = ajb + ajc
1556      aja = a(ja+j)
1557      t2 = aja - 0.5_dp * t1
1558      t3 = c1 * ( ajb - ajc )
1559      bjb = b(jb+j)
1560      bjc = b(jc+j)
1561      u1 = bjb + bjc
1562      bja = b(ja+j)
1563      u2 = bja - 0.5_dp * u1
1564      u3 = c1 * ( bjb - bjc )
1565      a(ja+j) = aja + t1
1566      b(ja+j) = bja + u1
1567      a(jb+j) = t2 - u3
1568      b(jb+j) = u2 + t3
1569      a(jc+j) = t2 + u3
1570      b(jc+j) = u2 - t3
1571      j = j + jump
1572  110 continue
1573      ja = ja + jstepx
1574      if (ja.lt.istart) ja = ja + ninc
1575  115 continue
1576  120 continue
1577*
1578*  finished if n3 = 3
1579*  ------------------
1580      if (n3.eq.3) go to 490
1581      kk = 2 * la
1582*
1583*  loop on nonzero k
1584*  -----------------
1585      do 150 k = ink , jstep-ink , ink
1586      co1 = trigs(kk+1)
1587      si1 = s*trigs(kk+2)
1588      co2 = trigs(2*kk+1)
1589      si2 = s*trigs(2*kk+2)
1590*
1591*  loop along transform
1592*  --------------------
1593      do 140 jjj = k , (n-1)*inc , 3*jstep
1594      ja = istart + jjj
1595*
1596*  "transverse" loop
1597*  -----------------
1598      do 135 nu = 1 , inq
1599      jb = ja + jstepl
1600      if (jb.lt.istart) jb = jb + ninc
1601      jc = jb + jstepl
1602      if (jc.lt.istart) jc = jc + ninc
1603      j = 0
1604*
1605*  loop across transforms
1606*  ----------------------
1607#ifdef OLD_CRAY
1608cdir$ ivdep, shortloop
1609#endif
1610      do 130 l = 1 , nvex
1611      ajb = a(jb+j)
1612      ajc = a(jc+j)
1613      t1 = ajb + ajc
1614      aja = a(ja+j)
1615      t2 = aja - 0.5_dp * t1
1616      t3 = c1 * ( ajb - ajc )
1617      bjb = b(jb+j)
1618      bjc = b(jc+j)
1619      u1 = bjb + bjc
1620      bja = b(ja+j)
1621      u2 = bja - 0.5_dp * u1
1622      u3 = c1 * ( bjb - bjc )
1623      a(ja+j) = aja + t1
1624      b(ja+j) = bja + u1
1625      a(jb+j) = co1*(t2-u3) - si1*(u2+t3)
1626      b(jb+j) = si1*(t2-u3) + co1*(u2+t3)
1627      a(jc+j) = co2*(t2+u3) - si2*(u2-t3)
1628      b(jc+j) = si2*(t2+u3) + co2*(u2-t3)
1629      j = j + jump
1630  130 continue
1631*-----( end of loop across transforms )
1632      ja = ja + jstepx
1633      if (ja.lt.istart) ja = ja + ninc
1634  135 continue
1635  140 continue
1636*-----( end of loop along transforms )
1637      kk = kk + 2*la
1638  150 continue
1639*-----( end of loop on nonzero k )
1640      la = 3*la
1641  160 continue
1642*-----( end of loop on type I radix-3 passes)
1643*
1644*  loop on type II radix-3 passes
1645*  ------------------------------
1646  400 continue
1647*
1648      do 480 ipass = mh+1 , m
1649      jstep = (n*inc) / (3*la)
1650      jstepl = jstep - ninc
1651      laincl = la*ink - ninc
1652*
1653*  k=0 loop (no twiddle factors)
1654*  -----------------------------
1655      do 430 ll = 0 , (la-1)*ink , 3*jstep
1656*
1657      do 420 jjj = ll , (n-1)*inc , 3*la*ink
1658      ja = istart + jjj
1659*
1660*  "transverse" loop
1661*  -----------------
1662      do 415 nu = 1 , inq
1663      jb = ja + jstepl
1664      if (jb.lt.istart) jb = jb + ninc
1665      jc = jb + jstepl
1666      if (jc.lt.istart) jc = jc + ninc
1667      jd = ja + laincl
1668      if (jd.lt.istart) jd = jd + ninc
1669      je = jd + jstepl
1670      if (je.lt.istart) je = je + ninc
1671      jf = je + jstepl
1672      if (jf.lt.istart) jf = jf + ninc
1673      jg = jd + laincl
1674      if (jg.lt.istart) jg = jg + ninc
1675      jh = jg + jstepl
1676      if (jh.lt.istart) jh = jh + ninc
1677      ji = jh + jstepl
1678      if (ji.lt.istart) ji = ji + ninc
1679      j = 0
1680*
1681*  loop across transforms
1682*  ----------------------
1683#ifdef OLD_CRAY
1684cdir$ ivdep, shortloop
1685#endif
1686      do 410 l = 1 , nvex
1687      ajb = a(jb+j)
1688      ajc = a(jc+j)
1689      t1 = ajb + ajc
1690      aja = a(ja+j)
1691      t2 = aja - 0.5_dp * t1
1692      t3 = c1 * ( ajb - ajc )
1693      ajd = a(jd+j)
1694      ajb =  ajd
1695      bjb = b(jb+j)
1696      bjc = b(jc+j)
1697      u1 = bjb + bjc
1698      bja = b(ja+j)
1699      u2 = bja - 0.5_dp * u1
1700      u3 = c1 * ( bjb - bjc )
1701      bjd = b(jd+j)
1702      bjb =  bjd
1703      a(ja+j) = aja + t1
1704      b(ja+j) = bja + u1
1705      a(jd+j) = t2 - u3
1706      b(jd+j) = u2 + t3
1707      ajc =  t2 + u3
1708      bjc =  u2 - t3
1709*----------------------
1710      aje = a(je+j)
1711      ajf = a(jf+j)
1712      t1 = aje + ajf
1713      t2 = ajb - 0.5_dp * t1
1714      t3 = c1 * ( aje - ajf )
1715      ajh = a(jh+j)
1716      ajf =  ajh
1717      bje = b(je+j)
1718      bjf = b(jf+j)
1719      u1 = bje + bjf
1720      u2 = bjb - 0.5_dp * u1
1721      u3 = c1 * ( bje - bjf )
1722      bjh = b(jh+j)
1723      bjf =  bjh
1724      a(jb+j) = ajb + t1
1725      b(jb+j) = bjb + u1
1726      a(je+j) = t2 - u3
1727      b(je+j) = u2 + t3
1728      a(jh+j) = t2 + u3
1729      b(jh+j) = u2 - t3
1730*----------------------
1731      aji = a(ji+j)
1732      t1 = ajf + aji
1733      ajg = a(jg+j)
1734      t2 = ajg - 0.5_dp * t1
1735      t3 = c1 * ( ajf - aji )
1736      t1 = ajg + t1
1737      a(jg+j) = ajc
1738      bji = b(ji+j)
1739      u1 = bjf + bji
1740      bjg = b(jg+j)
1741      u2 = bjg - 0.5_dp * u1
1742      u3 = c1 * ( bjf - bji )
1743      u1 = bjg + u1
1744      b(jg+j) = bjc
1745      a(jc+j) = t1
1746      b(jc+j) = u1
1747      a(jf+j) = t2 - u3
1748      b(jf+j) = u2 + t3
1749      a(ji+j) = t2 + u3
1750      b(ji+j) = u2 - t3
1751      j = j + jump
1752  410 continue
1753*-----( end of loop across transforms )
1754      ja = ja + jstepx
1755      if (ja.lt.istart) ja = ja + ninc
1756  415 continue
1757  420 continue
1758  430 continue
1759*-----( end of double loop for k=0 )
1760*
1761*  finished if last pass
1762*  ---------------------
1763      if (ipass.eq.m) go to 490
1764*
1765      kk = 2*la
1766*
1767*     loop on nonzero k
1768*     -----------------
1769      do 470 k = ink , jstep-ink , ink
1770      co1 = trigs(kk+1)
1771      si1 = s*trigs(kk+2)
1772      co2 = trigs(2*kk+1)
1773      si2 = s*trigs(2*kk+2)
1774*
1775*  double loop along first transform in block
1776*  ------------------------------------------
1777      do 460 ll = k , (la-1)*ink , 3*jstep
1778*
1779      do 450 jjj = ll , (n-1)*inc , 3*la*ink
1780      ja = istart + jjj
1781*
1782*  "transverse" loop
1783*  -----------------
1784      do 445 nu = 1 , inq
1785      jb = ja + jstepl
1786      if (jb.lt.istart) jb = jb + ninc
1787      jc = jb + jstepl
1788      if (jc.lt.istart) jc = jc + ninc
1789      jd = ja + laincl
1790      if (jd.lt.istart) jd = jd + ninc
1791      je = jd + jstepl
1792      if (je.lt.istart) je = je + ninc
1793      jf = je + jstepl
1794      if (jf.lt.istart) jf = jf + ninc
1795      jg = jd + laincl
1796      if (jg.lt.istart) jg = jg + ninc
1797      jh = jg + jstepl
1798      if (jh.lt.istart) jh = jh + ninc
1799      ji = jh + jstepl
1800      if (ji.lt.istart) ji = ji + ninc
1801      j = 0
1802*
1803*  loop across transforms
1804*  ----------------------
1805#ifdef OLD_CRAY
1806cdir$ ivdep, shortloop
1807#endif
1808      do 440 l = 1 , nvex
1809      ajb = a(jb+j)
1810      ajc = a(jc+j)
1811      t1 = ajb + ajc
1812      aja = a(ja+j)
1813      t2 = aja - 0.5_dp * t1
1814      t3 = c1 * ( ajb - ajc )
1815      ajd = a(jd+j)
1816      ajb =  ajd
1817      bjb = b(jb+j)
1818      bjc = b(jc+j)
1819      u1 = bjb + bjc
1820      bja = b(ja+j)
1821      u2 = bja - 0.5_dp * u1
1822      u3 = c1 * ( bjb - bjc )
1823      bjd = b(jd+j)
1824      bjb =  bjd
1825      a(ja+j) = aja + t1
1826      b(ja+j) = bja + u1
1827      a(jd+j) = co1*(t2-u3) - si1*(u2+t3)
1828      b(jd+j) = si1*(t2-u3) + co1*(u2+t3)
1829      ajc =  co2*(t2+u3) - si2*(u2-t3)
1830      bjc =  si2*(t2+u3) + co2*(u2-t3)
1831*----------------------
1832      aje = a(je+j)
1833      ajf = a(jf+j)
1834      t1 = aje + ajf
1835      t2 = ajb - 0.5_dp * t1
1836      t3 = c1 * ( aje - ajf )
1837      ajh = a(jh+j)
1838      ajf =  ajh
1839      bje = b(je+j)
1840      bjf = b(jf+j)
1841      u1 = bje + bjf
1842      u2 = bjb - 0.5_dp * u1
1843      u3 = c1 * ( bje - bjf )
1844      bjh = b(jh+j)
1845      bjf =  bjh
1846      a(jb+j) = ajb + t1
1847      b(jb+j) = bjb + u1
1848      a(je+j) = co1*(t2-u3) - si1*(u2+t3)
1849      b(je+j) = si1*(t2-u3) + co1*(u2+t3)
1850      a(jh+j) = co2*(t2+u3) - si2*(u2-t3)
1851      b(jh+j) = si2*(t2+u3) + co2*(u2-t3)
1852*----------------------
1853      aji = a(ji+j)
1854      t1 = ajf + aji
1855      ajg = a(jg+j)
1856      t2 = ajg - 0.5_dp * t1
1857      t3 = c1 * ( ajf - aji )
1858      t1 = ajg + t1
1859      a(jg+j) = ajc
1860      bji = b(ji+j)
1861      u1 = bjf + bji
1862      bjg = b(jg+j)
1863      u2 = bjg - 0.5_dp * u1
1864      u3 = c1 * ( bjf - bji )
1865      u1 = bjg + u1
1866      b(jg+j) = bjc
1867      a(jc+j) = t1
1868      b(jc+j) = u1
1869      a(jf+j) = co1*(t2-u3) - si1*(u2+t3)
1870      b(jf+j) = si1*(t2-u3) + co1*(u2+t3)
1871      a(ji+j) = co2*(t2+u3) - si2*(u2-t3)
1872      b(ji+j) = si2*(t2+u3) + co2*(u2-t3)
1873      j = j + jump
1874  440 continue
1875*-----(end of loop across transforms)
1876      ja = ja + jstepx
1877      if (ja.lt.istart) ja = ja + ninc
1878  445 continue
1879  450 continue
1880  460 continue
1881*-----( end of double loop for this k )
1882      kk = kk + 2*la
1883  470 continue
1884*-----( end of loop over values of k )
1885      la = 3*la
1886  480 continue
1887*-----( end of loop on type II radix-3 passes )
1888*-----( nvex transforms completed)
1889  490 continue
1890      istart = istart + nvex * jump
1891  500 continue
1892*-----( end of loop on blocks of transforms )
1893*
1894      return
1895      end subroutine gpfa3f
1896
1897!*******************************************************************
1898
1899*     fortran version of *gpfa5* -
1900*     radix-5 section of self-sorting, in-place,
1901*        generalized pfa
1902*
1903*-------------------------------------------------------------------
1904*
1905      subroutine gpfa5f(a,b,trigs,inc,jump,n,mm,lot,isign)
1906
1907      real(dp)     ::     a(*), b(*)
1908      real(dp)     ::     trigs(*)
1909      integer inc, jump, n, mm, lot, isign
1910
1911      real(dp), parameter :: sin36 = 0.587785252292473_dp,
1912     $                       sin72 = 0.951056516295154_dp,
1913     $                       qrt5  = 0.559016994374947_dp
1914      integer, parameter  :: lvr = 128
1915
1916      integer      ::  inq, jstepx, ninc, ink, mu, m, mh,
1917     $                 nblox, n5, left, istart, nb, nvex, la,
1918     $                 ipass, jstep, jstepl, kk, k, jjj, ja,
1919     $                 nu, jb, jc, jd, je, j, laincl, ll, l,
1920     $                 jf, jg, jh, ji, jj, jk, jl, jm, jn, jo,
1921     $                 jp, jq, jr, js, jt, ju, jv, jw, jx, jy
1922
1923      real(dp) ::  s, c1, c2, c3, co1, si1, co2, si2, co3,
1924     $                 si3, co4, si4, ajb, aje, t1, ajc, ajd,
1925     $                 t2, t3, t4, t5, t6, aja, t7, t8, t9, t10,
1926     $                 t11, bjb, bje, u1, bjc, bjd, u2, u3, u4,
1927     $                 u5, u6, bja, u7, u8, u9, u10, u11, ajf,
1928     $                 ajk, bjf, bjk, ajg, ajj, ajh, aji, ajl,
1929     $                 ajq, bjg, bjj, bjh, bji, bjl, bjq, ajo,
1930     $                 ajm, ajn, ajr, ajw, bjo, bjm, bjn, bjr,
1931     $                 bjw, ajt, ajs, ajx, ajp, ax, bjt, bjs,
1932     $                 bjx, bjp, bx, ajv, ajy, aju, bjv, bjy,
1933     $                 bju
1934*
1935*     ***************************************************************
1936*     *                                                             *
1937*     *  N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. *
1938*     *  RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE *
1939*     *  (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER.      *
1940*     *                                                             *
1941*     ***************************************************************
1942*
1943      n5 = 5 ** mm
1944      inq = n / n5
1945      jstepx = (n5-n) * inc
1946      ninc = n * inc
1947      ink = inc * inq
1948      mu = mod(inq,5)
1949      if (isign.eq.-1) mu = 5 - mu
1950*
1951      m = mm
1952      mh = (m+1)/2
1953      s = real(isign,kind=dp)
1954      c1 = qrt5
1955      c2 = sin72
1956      c3 = sin36
1957      if (mu.eq.2.or.mu.eq.3) then
1958         c1 = -c1
1959         c2 = sin36
1960         c3 = sin72
1961      endif
1962      if (mu.eq.3.or.mu.eq.4) c2 = -c2
1963      if (mu.eq.2.or.mu.eq.4) c3 = -c3
1964*
1965      nblox = 1 + (lot-1)/lvr
1966      left = lot
1967      s = real(isign,kind=dp)
1968      istart = 1
1969*
1970*  loop on blocks of lvr transforms
1971*  --------------------------------
1972      do 500 nb = 1 , nblox
1973*
1974      if (left.le.lvr) then
1975         nvex = left
1976      else if (left.lt.(2*lvr)) then
1977         nvex = left/2
1978         nvex = nvex + mod(nvex,2)
1979      else
1980         nvex = lvr
1981      endif
1982      left = left - nvex
1983*
1984      la = 1
1985*
1986*  loop on type I radix-5 passes
1987*  -----------------------------
1988      do 160 ipass = 1 , mh
1989      jstep = (n*inc) / (5*la)
1990      jstepl = jstep - ninc
1991      kk = 0
1992*
1993*  loop on k
1994*  ---------
1995      do 150 k = 0 , jstep-ink , ink
1996*
1997      if (k.gt.0) then
1998      co1 = trigs(kk+1)
1999      si1 = s*trigs(kk+2)
2000      co2 = trigs(2*kk+1)
2001      si2 = s*trigs(2*kk+2)
2002      co3 = trigs(3*kk+1)
2003      si3 = s*trigs(3*kk+2)
2004      co4 = trigs(4*kk+1)
2005      si4 = s*trigs(4*kk+2)
2006      endif
2007*
2008*  loop along transform
2009*  --------------------
2010      do 140 jjj = k , (n-1)*inc , 5*jstep
2011      ja = istart + jjj
2012*
2013*     "transverse" loop
2014*     -----------------
2015      do 135 nu = 1 , inq
2016      jb = ja + jstepl
2017      if (jb.lt.istart) jb = jb + ninc
2018      jc = jb + jstepl
2019      if (jc.lt.istart) jc = jc + ninc
2020      jd = jc + jstepl
2021      if (jd.lt.istart) jd = jd + ninc
2022      je = jd + jstepl
2023      if (je.lt.istart) je = je + ninc
2024      j = 0
2025*
2026*  loop across transforms
2027*  ----------------------
2028      if (k.eq.0) then
2029*
2030#ifdef OLD_CRAY
2031cdir$ ivdep, shortloop
2032#endif
2033      do 110 l = 1 , nvex
2034      ajb = a(jb+j)
2035      aje = a(je+j)
2036      t1 = ajb + aje
2037      ajc = a(jc+j)
2038      ajd = a(jd+j)
2039      t2 = ajc + ajd
2040      t3 = ajb - aje
2041      t4 = ajc - ajd
2042      t5 = t1 + t2
2043      t6 = c1 * ( t1 - t2 )
2044      aja = a(ja+j)
2045      t7 = aja - 0.25_dp * t5
2046      a(ja+j) = aja + t5
2047      t8 = t7 + t6
2048      t9 = t7 - t6
2049      t10 = c3 * t3 - c2 * t4
2050      t11 = c2 * t3 + c3 * t4
2051      bjb = b(jb+j)
2052      bje = b(je+j)
2053      u1 = bjb + bje
2054      bjc = b(jc+j)
2055      bjd = b(jd+j)
2056      u2 = bjc + bjd
2057      u3 = bjb - bje
2058      u4 = bjc - bjd
2059      u5 = u1 + u2
2060      u6 = c1 * ( u1 - u2 )
2061      bja = b(ja+j)
2062      u7 = bja - 0.25_dp * u5
2063      b(ja+j) = bja + u5
2064      u8 = u7 + u6
2065      u9 = u7 - u6
2066      u10 = c3 * u3 - c2 * u4
2067      u11 = c2 * u3 + c3 * u4
2068      a(jb+j) = t8 - u11
2069      b(jb+j) = u8 + t11
2070      a(je+j) = t8 + u11
2071      b(je+j) = u8 - t11
2072      a(jc+j) = t9 - u10
2073      b(jc+j) = u9 + t10
2074      a(jd+j) = t9 + u10
2075      b(jd+j) = u9 - t10
2076      j = j + jump
2077  110 continue
2078*
2079      else
2080*
2081#ifdef OLD_CRAY
2082cdir$ ivdep, shortloop
2083#endif
2084      do 130 l = 1 , nvex
2085      ajb = a(jb+j)
2086      aje = a(je+j)
2087      t1 = ajb + aje
2088      ajc = a(jc+j)
2089      ajd = a(jd+j)
2090      t2 = ajc + ajd
2091      t3 = ajb - aje
2092      t4 = ajc - ajd
2093      t5 = t1 + t2
2094      t6 = c1 * ( t1 - t2 )
2095      aja = a(ja+j)
2096      t7 = aja - 0.25_dp * t5
2097      a(ja+j) = aja + t5
2098      t8 = t7 + t6
2099      t9 = t7 - t6
2100      t10 = c3 * t3 - c2 * t4
2101      t11 = c2 * t3 + c3 * t4
2102      bjb = b(jb+j)
2103      bje = b(je+j)
2104      u1 = bjb + bje
2105      bjc = b(jc+j)
2106      bjd = b(jd+j)
2107      u2 = bjc + bjd
2108      u3 = bjb - bje
2109      u4 = bjc - bjd
2110      u5 = u1 + u2
2111      u6 = c1 * ( u1 - u2 )
2112      bja = b(ja+j)
2113      u7 = bja - 0.25_dp * u5
2114      b(ja+j) = bja + u5
2115      u8 = u7 + u6
2116      u9 = u7 - u6
2117      u10 = c3 * u3 - c2 * u4
2118      u11 = c2 * u3 + c3 * u4
2119      a(jb+j) = co1*(t8-u11) - si1*(u8+t11)
2120      b(jb+j) = si1*(t8-u11) + co1*(u8+t11)
2121      a(je+j) = co4*(t8+u11) - si4*(u8-t11)
2122      b(je+j) = si4*(t8+u11) + co4*(u8-t11)
2123      a(jc+j) = co2*(t9-u10) - si2*(u9+t10)
2124      b(jc+j) = si2*(t9-u10) + co2*(u9+t10)
2125      a(jd+j) = co3*(t9+u10) - si3*(u9-t10)
2126      b(jd+j) = si3*(t9+u10) + co3*(u9-t10)
2127      j = j + jump
2128  130 continue
2129*
2130      endif
2131*
2132*-----( end of loop across transforms )
2133*
2134      ja = ja + jstepx
2135      if (ja.lt.istart) ja = ja + ninc
2136  135 continue
2137  140 continue
2138*-----( end of loop along transforms )
2139      kk = kk + 2*la
2140  150 continue
2141*-----( end of loop on nonzero k )
2142      la = 5*la
2143  160 continue
2144*-----( end of loop on type I radix-5 passes)
2145*
2146      if (n.eq.5) go to 490
2147*
2148*  loop on type II radix-5 passes
2149*  ------------------------------
2150  400 continue
2151*
2152      do 480 ipass = mh+1 , m
2153      jstep = (n*inc) / (5*la)
2154      jstepl = jstep - ninc
2155      laincl = la * ink - ninc
2156      kk = 0
2157*
2158*     loop on k
2159*     ---------
2160      do 470 k = 0 , jstep-ink , ink
2161*
2162      if (k.gt.0) then
2163      co1 = trigs(kk+1)
2164      si1 = s*trigs(kk+2)
2165      co2 = trigs(2*kk+1)
2166      si2 = s*trigs(2*kk+2)
2167      co3 = trigs(3*kk+1)
2168      si3 = s*trigs(3*kk+2)
2169      co4 = trigs(4*kk+1)
2170      si4 = s*trigs(4*kk+2)
2171      endif
2172*
2173*  double loop along first transform in block
2174*  ------------------------------------------
2175      do 460 ll = k , (la-1)*ink , 5*jstep
2176*
2177      do 450 jjj = ll , (n-1)*inc , 5*la*ink
2178      ja = istart + jjj
2179*
2180*     "transverse" loop
2181*     -----------------
2182      do 445 nu = 1 , inq
2183      jb = ja + jstepl
2184      if (jb.lt.istart) jb = jb + ninc
2185      jc = jb + jstepl
2186      if (jc.lt.istart) jc = jc + ninc
2187      jd = jc + jstepl
2188      if (jd.lt.istart) jd = jd + ninc
2189      je = jd + jstepl
2190      if (je.lt.istart) je = je + ninc
2191      jf = ja + laincl
2192      if (jf.lt.istart) jf = jf + ninc
2193      jg = jf + jstepl
2194      if (jg.lt.istart) jg = jg + ninc
2195      jh = jg + jstepl
2196      if (jh.lt.istart) jh = jh + ninc
2197      ji = jh + jstepl
2198      if (ji.lt.istart) ji = ji + ninc
2199      jj = ji + jstepl
2200      if (jj.lt.istart) jj = jj + ninc
2201      jk = jf + laincl
2202      if (jk.lt.istart) jk = jk + ninc
2203      jl = jk + jstepl
2204      if (jl.lt.istart) jl = jl + ninc
2205      jm = jl + jstepl
2206      if (jm.lt.istart) jm = jm + ninc
2207      jn = jm + jstepl
2208      if (jn.lt.istart) jn = jn + ninc
2209      jo = jn + jstepl
2210      if (jo.lt.istart) jo = jo + ninc
2211      jp = jk + laincl
2212      if (jp.lt.istart) jp = jp + ninc
2213      jq = jp + jstepl
2214      if (jq.lt.istart) jq = jq + ninc
2215      jr = jq + jstepl
2216      if (jr.lt.istart) jr = jr + ninc
2217      js = jr + jstepl
2218      if (js.lt.istart) js = js + ninc
2219      jt = js + jstepl
2220      if (jt.lt.istart) jt = jt + ninc
2221      ju = jp + laincl
2222      if (ju.lt.istart) ju = ju + ninc
2223      jv = ju + jstepl
2224      if (jv.lt.istart) jv = jv + ninc
2225      jw = jv + jstepl
2226      if (jw.lt.istart) jw = jw + ninc
2227      jx = jw + jstepl
2228      if (jx.lt.istart) jx = jx + ninc
2229      jy = jx + jstepl
2230      if (jy.lt.istart) jy = jy + ninc
2231      j = 0
2232*
2233*  loop across transforms
2234*  ----------------------
2235      if (k.eq.0) then
2236*
2237#ifdef OLD_CRAY
2238cdir$ ivdep, shortloop
2239#endif
2240      do 410 l = 1 , nvex
2241      ajb = a(jb+j)
2242      aje = a(je+j)
2243      t1 = ajb + aje
2244      ajc = a(jc+j)
2245      ajd = a(jd+j)
2246      t2 = ajc + ajd
2247      t3 = ajb - aje
2248      t4 = ajc - ajd
2249      ajf = a(jf+j)
2250      ajb =  ajf
2251      t5 = t1 + t2
2252      t6 = c1 * ( t1 - t2 )
2253      aja = a(ja+j)
2254      t7 = aja - 0.25_dp * t5
2255      a(ja+j) = aja + t5
2256      t8 = t7 + t6
2257      t9 = t7 - t6
2258      ajk = a(jk+j)
2259      ajc =  ajk
2260      t10 = c3 * t3 - c2 * t4
2261      t11 = c2 * t3 + c3 * t4
2262      bjb = b(jb+j)
2263      bje = b(je+j)
2264      u1 = bjb + bje
2265      bjc = b(jc+j)
2266      bjd = b(jd+j)
2267      u2 = bjc + bjd
2268      u3 = bjb - bje
2269      u4 = bjc - bjd
2270      bjf = b(jf+j)
2271      bjb =  bjf
2272      u5 = u1 + u2
2273      u6 = c1 * ( u1 - u2 )
2274      bja = b(ja+j)
2275      u7 = bja - 0.25_dp * u5
2276      b(ja+j) = bja + u5
2277      u8 = u7 + u6
2278      u9 = u7 - u6
2279      bjk = b(jk+j)
2280      bjc =  bjk
2281      u10 = c3 * u3 - c2 * u4
2282      u11 = c2 * u3 + c3 * u4
2283      a(jf+j) = t8 - u11
2284      b(jf+j) = u8 + t11
2285      aje =  t8 + u11
2286      bje =  u8 - t11
2287      a(jk+j) = t9 - u10
2288      b(jk+j) = u9 + t10
2289      ajd =  t9 + u10
2290      bjd =  u9 - t10
2291*----------------------
2292      ajg = a(jg+j)
2293      ajj = a(jj+j)
2294      t1 = ajg + ajj
2295      ajh = a(jh+j)
2296      aji = a(ji+j)
2297      t2 = ajh + aji
2298      t3 = ajg - ajj
2299      t4 = ajh - aji
2300      ajl = a(jl+j)
2301      ajh =  ajl
2302      t5 = t1 + t2
2303      t6 = c1 * ( t1 - t2 )
2304      t7 = ajb - 0.25_dp * t5
2305      a(jb+j) = ajb + t5
2306      t8 = t7 + t6
2307      t9 = t7 - t6
2308      ajq = a(jq+j)
2309      aji =  ajq
2310      t10 = c3 * t3 - c2 * t4
2311      t11 = c2 * t3 + c3 * t4
2312      bjg = b(jg+j)
2313      bjj = b(jj+j)
2314      u1 = bjg + bjj
2315      bjh = b(jh+j)
2316      bji = b(ji+j)
2317      u2 = bjh + bji
2318      u3 = bjg - bjj
2319      u4 = bjh - bji
2320      bjl = b(jl+j)
2321      bjh =  bjl
2322      u5 = u1 + u2
2323      u6 = c1 * ( u1 - u2 )
2324      u7 = bjb - 0.25_dp * u5
2325      b(jb+j) = bjb + u5
2326      u8 = u7 + u6
2327      u9 = u7 - u6
2328      bjq = b(jq+j)
2329      bji =  bjq
2330      u10 = c3 * u3 - c2 * u4
2331      u11 = c2 * u3 + c3 * u4
2332      a(jg+j) = t8 - u11
2333      b(jg+j) = u8 + t11
2334      ajj =  t8 + u11
2335      bjj =  u8 - t11
2336      a(jl+j) = t9 - u10
2337      b(jl+j) = u9 + t10
2338      a(jq+j) = t9 + u10
2339      b(jq+j) = u9 - t10
2340*----------------------
2341      ajo = a(jo+j)
2342      t1 = ajh + ajo
2343      ajm = a(jm+j)
2344      ajn = a(jn+j)
2345      t2 = ajm + ajn
2346      t3 = ajh - ajo
2347      t4 = ajm - ajn
2348      ajr = a(jr+j)
2349      ajn =  ajr
2350      t5 = t1 + t2
2351      t6 = c1 * ( t1 - t2 )
2352      t7 = ajc - 0.25_dp * t5
2353      a(jc+j) = ajc + t5
2354      t8 = t7 + t6
2355      t9 = t7 - t6
2356      ajw = a(jw+j)
2357      ajo =  ajw
2358      t10 = c3 * t3 - c2 * t4
2359      t11 = c2 * t3 + c3 * t4
2360      bjo = b(jo+j)
2361      u1 = bjh + bjo
2362      bjm = b(jm+j)
2363      bjn = b(jn+j)
2364      u2 = bjm + bjn
2365      u3 = bjh - bjo
2366      u4 = bjm - bjn
2367      bjr = b(jr+j)
2368      bjn =  bjr
2369      u5 = u1 + u2
2370      u6 = c1 * ( u1 - u2 )
2371      u7 = bjc - 0.25_dp * u5
2372      b(jc+j) = bjc + u5
2373      u8 = u7 + u6
2374      u9 = u7 - u6
2375      bjw = b(jw+j)
2376      bjo =  bjw
2377      u10 = c3 * u3 - c2 * u4
2378      u11 = c2 * u3 + c3 * u4
2379      a(jh+j) = t8 - u11
2380      b(jh+j) = u8 + t11
2381      a(jw+j) = t8 + u11
2382      b(jw+j) = u8 - t11
2383      a(jm+j) = t9 - u10
2384      b(jm+j) = u9 + t10
2385      a(jr+j) = t9 + u10
2386      b(jr+j) = u9 - t10
2387*----------------------
2388      ajt = a(jt+j)
2389      t1 = aji + ajt
2390      ajs = a(js+j)
2391      t2 = ajn + ajs
2392      t3 = aji - ajt
2393      t4 = ajn - ajs
2394      ajx = a(jx+j)
2395      ajt =  ajx
2396      t5 = t1 + t2
2397      t6 = c1 * ( t1 - t2 )
2398      ajp = a(jp+j)
2399      t7 = ajp - 0.25_dp * t5
2400      ax = ajp + t5
2401      t8 = t7 + t6
2402      t9 = t7 - t6
2403      a(jp+j) = ajd
2404      t10 = c3 * t3 - c2 * t4
2405      t11 = c2 * t3 + c3 * t4
2406      a(jd+j) = ax
2407      bjt = b(jt+j)
2408      u1 = bji + bjt
2409      bjs = b(js+j)
2410      u2 = bjn + bjs
2411      u3 = bji - bjt
2412      u4 = bjn - bjs
2413      bjx = b(jx+j)
2414      bjt =  bjx
2415      u5 = u1 + u2
2416      u6 = c1 * ( u1 - u2 )
2417      bjp = b(jp+j)
2418      u7 = bjp - 0.25_dp * u5
2419      bx = bjp + u5
2420      u8 = u7 + u6
2421      u9 = u7 - u6
2422      b(jp+j) = bjd
2423      u10 = c3 * u3 - c2 * u4
2424      u11 = c2 * u3 + c3 * u4
2425      b(jd+j) = bx
2426      a(ji+j) = t8 - u11
2427      b(ji+j) = u8 + t11
2428      a(jx+j) = t8 + u11
2429      b(jx+j) = u8 - t11
2430      a(jn+j) = t9 - u10
2431      b(jn+j) = u9 + t10
2432      a(js+j) = t9 + u10
2433      b(js+j) = u9 - t10
2434*----------------------
2435      ajv = a(jv+j)
2436      ajy = a(jy+j)
2437      t1 = ajv + ajy
2438      t2 = ajo + ajt
2439      t3 = ajv - ajy
2440      t4 = ajo - ajt
2441      a(jv+j) = ajj
2442      t5 = t1 + t2
2443      t6 = c1 * ( t1 - t2 )
2444      aju = a(ju+j)
2445      t7 = aju - 0.25_dp * t5
2446      ax = aju + t5
2447      t8 = t7 + t6
2448      t9 = t7 - t6
2449      a(ju+j) = aje
2450      t10 = c3 * t3 - c2 * t4
2451      t11 = c2 * t3 + c3 * t4
2452      a(je+j) = ax
2453      bjv = b(jv+j)
2454      bjy = b(jy+j)
2455      u1 = bjv + bjy
2456      u2 = bjo + bjt
2457      u3 = bjv - bjy
2458      u4 = bjo - bjt
2459      b(jv+j) = bjj
2460      u5 = u1 + u2
2461      u6 = c1 * ( u1 - u2 )
2462      bju = b(ju+j)
2463      u7 = bju - 0.25_dp * u5
2464      bx = bju + u5
2465      u8 = u7 + u6
2466      u9 = u7 - u6
2467      b(ju+j) = bje
2468      u10 = c3 * u3 - c2 * u4
2469      u11 = c2 * u3 + c3 * u4
2470      b(je+j) = bx
2471      a(jj+j) = t8 - u11
2472      b(jj+j) = u8 + t11
2473      a(jy+j) = t8 + u11
2474      b(jy+j) = u8 - t11
2475      a(jo+j) = t9 - u10
2476      b(jo+j) = u9 + t10
2477      a(jt+j) = t9 + u10
2478      b(jt+j) = u9 - t10
2479      j = j + jump
2480  410 continue
2481*
2482      else
2483*
2484#ifdef OLD_CRAY
2485cdir$ ivdep, shortloop
2486#endif
2487      do 440 l = 1 , nvex
2488      ajb = a(jb+j)
2489      aje = a(je+j)
2490      t1 = ajb + aje
2491      ajc = a(jc+j)
2492      ajd = a(jd+j)
2493      t2 = ajc + ajd
2494      t3 = ajb - aje
2495      t4 = ajc - ajd
2496      ajf = a(jf+j)
2497      ajb =  ajf
2498      t5 = t1 + t2
2499      t6 = c1 * ( t1 - t2 )
2500      aja = a(ja+j)
2501      t7 = aja - 0.25_dp * t5
2502      a(ja+j) = aja + t5
2503      t8 = t7 + t6
2504      t9 = t7 - t6
2505      ajk = a(jk+j)
2506      ajc =  ajk
2507      t10 = c3 * t3 - c2 * t4
2508      t11 = c2 * t3 + c3 * t4
2509      bjb = b(jb+j)
2510      bje = b(je+j)
2511      u1 = bjb + bje
2512      bjc = b(jc+j)
2513      bjd = b(jd+j)
2514      u2 = bjc + bjd
2515      u3 = bjb - bje
2516      u4 = bjc - bjd
2517      bjf = b(jf+j)
2518      bjb =  bjf
2519      u5 = u1 + u2
2520      u6 = c1 * ( u1 - u2 )
2521      bja = b(ja+j)
2522      u7 = bja - 0.25_dp * u5
2523      b(ja+j) = bja + u5
2524      u8 = u7 + u6
2525      u9 = u7 - u6
2526      bjk = b(jk+j)
2527      bjc =  bjk
2528      u10 = c3 * u3 - c2 * u4
2529      u11 = c2 * u3 + c3 * u4
2530      a(jf+j) = co1*(t8-u11) - si1*(u8+t11)
2531      b(jf+j) = si1*(t8-u11) + co1*(u8+t11)
2532      aje =  co4*(t8+u11) - si4*(u8-t11)
2533      bje =  si4*(t8+u11) + co4*(u8-t11)
2534      a(jk+j) = co2*(t9-u10) - si2*(u9+t10)
2535      b(jk+j) = si2*(t9-u10) + co2*(u9+t10)
2536      ajd =  co3*(t9+u10) - si3*(u9-t10)
2537      bjd =  si3*(t9+u10) + co3*(u9-t10)
2538*----------------------
2539      ajg = a(jg+j)
2540      ajj = a(jj+j)
2541      t1 = ajg + ajj
2542      ajh = a(jh+j)
2543      aji = a(ji+j)
2544      t2 = ajh + aji
2545      t3 = ajg - ajj
2546      t4 = ajh - aji
2547      ajl = a(jl+j)
2548      ajh =  ajl
2549      t5 = t1 + t2
2550      t6 = c1 * ( t1 - t2 )
2551      t7 = ajb - 0.25_dp * t5
2552      a(jb+j) = ajb + t5
2553      t8 = t7 + t6
2554      t9 = t7 - t6
2555      ajq = a(jq+j)
2556      aji =  ajq
2557      t10 = c3 * t3 - c2 * t4
2558      t11 = c2 * t3 + c3 * t4
2559      bjg = b(jg+j)
2560      bjj = b(jj+j)
2561      u1 = bjg + bjj
2562      bjh = b(jh+j)
2563      bji = b(ji+j)
2564      u2 = bjh + bji
2565      u3 = bjg - bjj
2566      u4 = bjh - bji
2567      bjl = b(jl+j)
2568      bjh =  bjl
2569      u5 = u1 + u2
2570      u6 = c1 * ( u1 - u2 )
2571      u7 = bjb - 0.25_dp * u5
2572      b(jb+j) = bjb + u5
2573      u8 = u7 + u6
2574      u9 = u7 - u6
2575      bjq = b(jq+j)
2576      bji =  bjq
2577      u10 = c3 * u3 - c2 * u4
2578      u11 = c2 * u3 + c3 * u4
2579      a(jg+j) = co1*(t8-u11) - si1*(u8+t11)
2580      b(jg+j) = si1*(t8-u11) + co1*(u8+t11)
2581      ajj =  co4*(t8+u11) - si4*(u8-t11)
2582      bjj =  si4*(t8+u11) + co4*(u8-t11)
2583      a(jl+j) = co2*(t9-u10) - si2*(u9+t10)
2584      b(jl+j) = si2*(t9-u10) + co2*(u9+t10)
2585      a(jq+j) = co3*(t9+u10) - si3*(u9-t10)
2586      b(jq+j) = si3*(t9+u10) + co3*(u9-t10)
2587*----------------------
2588      ajo = a(jo+j)
2589      t1 = ajh + ajo
2590      ajm = a(jm+j)
2591      ajn = a(jn+j)
2592      t2 = ajm + ajn
2593      t3 = ajh - ajo
2594      t4 = ajm - ajn
2595      ajr = a(jr+j)
2596      ajn =  ajr
2597      t5 = t1 + t2
2598      t6 = c1 * ( t1 - t2 )
2599      t7 = ajc - 0.25_dp * t5
2600      a(jc+j) = ajc + t5
2601      t8 = t7 + t6
2602      t9 = t7 - t6
2603      ajw = a(jw+j)
2604      ajo =  ajw
2605      t10 = c3 * t3 - c2 * t4
2606      t11 = c2 * t3 + c3 * t4
2607      bjo = b(jo+j)
2608      u1 = bjh + bjo
2609      bjm = b(jm+j)
2610      bjn = b(jn+j)
2611      u2 = bjm + bjn
2612      u3 = bjh - bjo
2613      u4 = bjm - bjn
2614      bjr = b(jr+j)
2615      bjn =  bjr
2616      u5 = u1 + u2
2617      u6 = c1 * ( u1 - u2 )
2618      u7 = bjc - 0.25_dp * u5
2619      b(jc+j) = bjc + u5
2620      u8 = u7 + u6
2621      u9 = u7 - u6
2622      bjw = b(jw+j)
2623      bjo =  bjw
2624      u10 = c3 * u3 - c2 * u4
2625      u11 = c2 * u3 + c3 * u4
2626      a(jh+j) = co1*(t8-u11) - si1*(u8+t11)
2627      b(jh+j) = si1*(t8-u11) + co1*(u8+t11)
2628      a(jw+j) = co4*(t8+u11) - si4*(u8-t11)
2629      b(jw+j) = si4*(t8+u11) + co4*(u8-t11)
2630      a(jm+j) = co2*(t9-u10) - si2*(u9+t10)
2631      b(jm+j) = si2*(t9-u10) + co2*(u9+t10)
2632      a(jr+j) = co3*(t9+u10) - si3*(u9-t10)
2633      b(jr+j) = si3*(t9+u10) + co3*(u9-t10)
2634*----------------------
2635      ajt = a(jt+j)
2636      t1 = aji + ajt
2637      ajs = a(js+j)
2638      t2 = ajn + ajs
2639      t3 = aji - ajt
2640      t4 = ajn - ajs
2641      ajx = a(jx+j)
2642      ajt =  ajx
2643      t5 = t1 + t2
2644      t6 = c1 * ( t1 - t2 )
2645      ajp = a(jp+j)
2646      t7 = ajp - 0.25_dp * t5
2647      ax = ajp + t5
2648      t8 = t7 + t6
2649      t9 = t7 - t6
2650      a(jp+j) = ajd
2651      t10 = c3 * t3 - c2 * t4
2652      t11 = c2 * t3 + c3 * t4
2653      a(jd+j) = ax
2654      bjt = b(jt+j)
2655      u1 = bji + bjt
2656      bjs = b(js+j)
2657      u2 = bjn + bjs
2658      u3 = bji - bjt
2659      u4 = bjn - bjs
2660      bjx = b(jx+j)
2661      bjt =  bjx
2662      u5 = u1 + u2
2663      u6 = c1 * ( u1 - u2 )
2664      bjp = b(jp+j)
2665      u7 = bjp - 0.25_dp * u5
2666      bx = bjp + u5
2667      u8 = u7 + u6
2668      u9 = u7 - u6
2669      b(jp+j) = bjd
2670      u10 = c3 * u3 - c2 * u4
2671      u11 = c2 * u3 + c3 * u4
2672      b(jd+j) = bx
2673      a(ji+j) = co1*(t8-u11) - si1*(u8+t11)
2674      b(ji+j) = si1*(t8-u11) + co1*(u8+t11)
2675      a(jx+j) = co4*(t8+u11) - si4*(u8-t11)
2676      b(jx+j) = si4*(t8+u11) + co4*(u8-t11)
2677      a(jn+j) = co2*(t9-u10) - si2*(u9+t10)
2678      b(jn+j) = si2*(t9-u10) + co2*(u9+t10)
2679      a(js+j) = co3*(t9+u10) - si3*(u9-t10)
2680      b(js+j) = si3*(t9+u10) + co3*(u9-t10)
2681*----------------------
2682      ajv = a(jv+j)
2683      ajy = a(jy+j)
2684      t1 = ajv + ajy
2685      t2 = ajo + ajt
2686      t3 = ajv - ajy
2687      t4 = ajo - ajt
2688      a(jv+j) = ajj
2689      t5 = t1 + t2
2690      t6 = c1 * ( t1 - t2 )
2691      aju = a(ju+j)
2692      t7 = aju - 0.25_dp * t5
2693      ax = aju + t5
2694      t8 = t7 + t6
2695      t9 = t7 - t6
2696      a(ju+j) = aje
2697      t10 = c3 * t3 - c2 * t4
2698      t11 = c2 * t3 + c3 * t4
2699      a(je+j) = ax
2700      bjv = b(jv+j)
2701      bjy = b(jy+j)
2702      u1 = bjv + bjy
2703      u2 = bjo + bjt
2704      u3 = bjv - bjy
2705      u4 = bjo - bjt
2706      b(jv+j) = bjj
2707      u5 = u1 + u2
2708      u6 = c1 * ( u1 - u2 )
2709      bju = b(ju+j)
2710      u7 = bju - 0.25_dp * u5
2711      bx = bju + u5
2712      u8 = u7 + u6
2713      u9 = u7 - u6
2714      b(ju+j) = bje
2715      u10 = c3 * u3 - c2 * u4
2716      u11 = c2 * u3 + c3 * u4
2717      b(je+j) = bx
2718      a(jj+j) = co1*(t8-u11) - si1*(u8+t11)
2719      b(jj+j) = si1*(t8-u11) + co1*(u8+t11)
2720      a(jy+j) = co4*(t8+u11) - si4*(u8-t11)
2721      b(jy+j) = si4*(t8+u11) + co4*(u8-t11)
2722      a(jo+j) = co2*(t9-u10) - si2*(u9+t10)
2723      b(jo+j) = si2*(t9-u10) + co2*(u9+t10)
2724      a(jt+j) = co3*(t9+u10) - si3*(u9-t10)
2725      b(jt+j) = si3*(t9+u10) + co3*(u9-t10)
2726      j = j + jump
2727  440 continue
2728*
2729      endif
2730*
2731*-----(end of loop across transforms)
2732*
2733      ja = ja + jstepx
2734      if (ja.lt.istart) ja = ja + ninc
2735  445 continue
2736  450 continue
2737  460 continue
2738*-----( end of double loop for this k )
2739      kk = kk + 2*la
2740  470 continue
2741*-----( end of loop over values of k )
2742      la = 5*la
2743  480 continue
2744*-----( end of loop on type II radix-5 passes )
2745*-----( nvex transforms completed)
2746  490 continue
2747      istart = istart + nvex * jump
2748  500 continue
2749*-----( end of loop on blocks of transforms )
2750*
2751      return
2752      end subroutine gpfa5f
2753
2754      end module m_fft_gpfa
2755