1c $Id$
2c
3c     this module contains the home-made service routines
4c     like matrix manipulations etc.
5c     some of these duplicate blas routines and should be replaced
6c     by them later
7c.... now come the homebrewn
8c..  replaced by blas routine (same calling sequence)
9c     subroutine add1 (a,con,b,nn)
10c     implicit real*8 (a-h,o-z)
11c     dimension a(1), b(1)
12c     n=nn
13c     do 10 i=1,n
14c  10 b(i)=b(i)+con*a(i)
15c     return
16c
17c     end
18c     subroutine tfer (a,b,n)
19c     implicit real*8 (a-h,o-z)
20c     dimension a(1), b(1)
21c
22c           transfer a to b
23c
24c     nn=n
25c     do 10 i=1,nn
26c  10 b(i)=a(i)
27c     return
28c
29c     end
30      subroutine txs_add(a,b,n)
31      implicit double precision (a-h,o-z)
32      dimension a(*),b(*)
33c...  adds a to b, result in b
34c%include '/sys/ins/base.ins.ftn'
35c%include '/sys/ins/vec.ins.ftn'
36c      call vec_$dadd_vector(a,b,n,b)
37      do 100 i=1,n
38        b(i)=b(i)+a(i)
39 100  continue
40      end
41      subroutine txs_add1(a,con,b,n)
42      implicit double precision (a-h,o-z)
43      dimension a(*),b(*)
44      call daxpy(n,con,a,1,b,1)
45      end
46c
47      subroutine tfer (a,b,n)
48      implicit double precision (a-h,o-z)
49      dimension a(*),b(*)
50c
51*      if (n.lt.1000) then
52         do i = 1, n
53            b(i) = a(i)
54         enddo
55*      else
56*         call dcopy(n,a,1,b,1)
57*      endif
58c
59      end
60      subroutine mxmtr(a,b,c,m,n,p)
61c...  this is not a true blas routine- it is our own creation
62c...  this subroutine performs the matrix multiply c=a(trspse)*b
63c     both the storage dimension and the real dimension of the
64c     matrices is given by a(p,m),b(p,n),c(m,n)
65c     it uses an efficient blocking algorithm
66c     note that ingeneral it is more efficient to perform a(tr)b
67c     than a*b. transpose the matrix before if you have to
68      implicit real*8 (a-h,o-z)
69      integer p
70      dimension  a(p,m), b(p,n),c(m,n)
71      parameter (ibl=16)
72c     performs a(tr)*b=c
73      do 500 i=1,m,ibl
74        iend=min0(i+ibl-1,m)
75        do 500 j=1,n,ibl
76          jend=min0(j+ibl-1,n)
77          do 500 i1=i,iend
78            do 500 j1=j,jend
79          sum=0.0d0
80c         sum=vec_$ddot(a(1,i1),b(1,j1),p)
81          sum=ddot(p,a(1,i1),1,b(1,j1),1)
82c         call scalar(a(1,i1),b(1,j1),p,sum)
83c         do 400 k=1,p
84c            sum=sum+a(k,i1)*b(k,j1)
85c400  continue
86       c(i1,j1)=sum
87 500  continue
88      end
89c
90c     subroutine sdiag2v(m,n,a,d,x)
91      subroutine sdiag2 (m,n,a,d,x)
92      implicit real*8 (a-h,o-z)
93      integer*4 f4,h4,l4
94ckwol parameter (mxdim=401)
95ckwol parameter (mxdim=601)
96      parameter (mxdim=2000)
97c
98c
99c      computation of all eigenvalues and eigenvectors of a real
100c      symmetric matrix by the method of qr transformations.
101c      if the euclidean norm of the rows varies   s t r o n g l y
102c      most accurate results may be obtained by permuting rows and
103c      columns to give an arrangement with increasing norms of rows.
104c
105c      two machine constants must be adjusted appropriately,
106c      eps = minimum of all x such that 1+x is greater than 1 on the
107c      e     computer,
108c      tol = inf / eps  with inf = minimum of all positive x represen-
109c            table within the computer.
110c      a dimension statement e(256) may also be changed appropriately.
111c
112c      input
113c
114c      (m)   not larger than mxdim,  corresponding value of the actual
115c            dimension statement a(m,m), d(m), x(m,m),
116c      (n)   not larger than (m), order of the matrix,
117c      (a)   the matrix to be diagonalized, its lower triangle has to
118c            be given as  ((a(i,j), j=1,i), i=1,n),
119c
120c      output
121c
122c      (d)   components d(1), ..., d(n) hold the computed eigenvalues
123c            in ascending sequence. the remaining components of (d) are
124c            unchanged,
125c      (x)   the computed eigenvector corresponding to the j-th eigen-
126c            value is stored as column (x(i,j), i=1,n). the eigenvectors
127c            are normalized and orthogonal to working accuracy. the
128c            remaining entries of (x) are unchanged.
129c
130c      array (a) is unaltered. however, the actual parameters
131c      corresponding to (a) and (x)  may be identical, ''overwriting''
132c      the eigenvectors on (a).
133c
134c      leibniz-rechenzentrum, munich 1965
135c
136c
137      dimension a(m,m), d(m), x(m,m)
138      dimension e(mxdim)
139c
140c     correct adjustment for ieee 64  bit floating  point numbers`
141c       is about (not too sensitive)
142      eps=1.0d-14
143      tol=1.0d-200
144c
145      if (n.eq.1) go to 350
146      if (n.gt.mxdim) then
147         write(*,*) ' dimension too big, increase mxdim in sdiag2'
148         write (*,*) ' max= ',mxdim, ' actual= ',n
149          call txs_error
150c         can be replaced by go to 360 if you do not have "error"
151      end if
152      do 10 i=1,n
153      do 10 j=1,i
154   10 x(i,j)=a(i,j)
155c
156c     householder reduction
157c     simulation of loop do 150 i=n,2,(-1)
158c
159      do 150 ni=2,n
160         ii=n+2-ni
161c
162c     fake loop for recursive address calculation
163c
164      do 150 i=ii,ii
165         l=i-2
166         h=0.0
167         g=x(i,i-1)
168#if 1
169         l4=l
170         if (l4) 140,140,20
171#else
172         if (l) 140,140,20
173#endif
174   20    do 30 k=1,l
175   30    h=h+x(i,k)**2
176         s=h+g*g
177         if (s.ge.tol) go to 40
178         h=0.0
179         go to 140
180#if 1
181   40    h4=h
182         if (h4) 140,140,50
183#else
184   40    if (h) 140,140,50
185#endif
186   50    l=l+1
187         f=g
188         g=dsqrt(s)
189#if 1
190         f4=f
191         if (f4) 70,70,60
192#else
193         if (f) 70,70,60
194#endif
195   60    g=-g
196   70    h=s-f*g
197         x(i,i-1)=f-g
198         f=0.0
199c
200         do 110 j=1,l
201            x(j,i)=x(i,j)/h
202            s=0.0
203c           do 80 k=1,j
204c  80       s=s+x(j,k)*x(i,k)
205c           s=vec_$ddot_i(x(j,1),m,x(i,1),m,j)
206            s=ddot(j,x(j,1),m,x(i,1),m)
207            j1=j+1
208            if (j1.gt.l) go to 100
209c           do 90 k=j1,l
210c  90       s=s+x(k,j)*x(i,k)
211            j1l=l-j1+1
212c           write(*,*) s,j,j1,i,m,j1l
213c           s=s+vec_$ddot_i(x(j1,j),1,x(i,j1),m,j1l)
214            s=s+ddot(j1l,x(j1,j),1,x(i,j1),m)
215  100       e(j)=s/h
216  110    f=f+s*x(j,i)
217c
218         f=f/(h+h)
219c
220c        do 120 j=1,l
221c 120    e(j)=e(j)-f*x(i,j)
222c        call vec_$dmult_add_i(e,1,x(i,1),m,l,-f,e,1)
223         call daxpy(l,-f,x(i,1),m,e,1)
224c
225         do 130 j=1,l
226            f=x(i,j)
227            s=e(j)
228c        do 130 k=1,j
229c 130    x(j,k)=x(j,k)-f*e(k)-x(i,k)*s
230c        call vec_$dmult_add_i(x(j,1),m,e,1,j,-f,x(j,1),m)
231c        call vec_$dmult_add_i(x(j,1),m,x(i,1),m,j,-s,x(j,1),m)
232         call daxpy(j,-f,e,1,x(j,1),m)
233         call daxpy(j,-s,x(i,1),m,x(j,1),m)
234  130    continue
235c
236  140    d(i)=h
237  150 e(i-1)=g
238c
239c     accumulation of transformation matrices
240c
241      d(1)=x(1,1)
242      x(1,1)=1.0
243      do 200 i=2,n
244         l=i-1
245         if (d(i)) 190,190,160
246  160    do 180 j=1,l
247            s=0.0
248c           do 170 k=1,l
249c 170       s=s+x(i,k)*x(k,j)
250c           s=vec_$ddot_i(x(i,1),m,x(1,j),1,l)
251            s=ddot(l,x(i,1),m,x(1,j),1)
252c        do 180 k=1,l
253c 180    x(k,j)=x(k,j)-s*x(k,i)
254c      call vec_$dmult_add(x(1,j),x(1,i),l,-s,x(1,j))
255       call daxpy(l,-s,x(1,i),1,x(1,j),1)
256  180  continue
257  190    d(i)=x(i,i)
258         x(i,i)=1.0
259      do 200 j=1,l
260         x(i,j)=0.0
261  200 x(j,i)=0.0
262c     call vec_$dzero_i(x(i,1),m,l)
263c     call vec_$dzero(x(1,i),l)
264c 200 continue
265c
266c     diagonalization of the tridiagonal matrix
267c
268      b=0.0
269      f=0.0
270      e(n)=0.0
271c
272      do 310 l=1,n
273         h=eps*(dabs(d(l))+dabs(e(l)))
274         if (h.gt.b) b=h
275c
276c     test for splitting
277c
278         do 210 j=l,n
279            if (dabs(e(j)).le.b) go to 220
280  210    continue
281c
282c     test for convergence
283c
284  220    if (j.eq.l) go to 310
285c
286c     shift from upper 2*2 minor
287c
288  230    p=(d(l+1)-d(l))*0.5/e(l)
289         r=dsqrt(p*p+1.0)
290         if (p) 240,250,250
291  240    p=p-r
292         go to 260
293  250    p=p+r
294  260    h=d(l)-e(l)/p
295         do 270 i=l,n
296  270    d(i)=d(i)-h
297         f=f+h
298c
299c     qr transformation
300c
301         p=d(j)
302         c=1.0
303         s=0.0
304c
305c     simulation of loop do 330 i=j-1,l,(-1)
306c
307         j1=j-1
308         do 300 ni=l,j1
309            ii=l+j1-ni
310c
311c     fake loop for recursive address calculation
312c
313         do 300 i=ii,ii
314            g=c*e(i)
315            h=c*p
316c
317c     protection against underflow of exponents
318c
319            if (dabs(p).lt.dabs(e(i))) go to 280
320            c=e(i)/p
321            r=dsqrt(c*c+1.0)
322            e(i+1)=s*p*r
323            s=c/r
324            c=1.0/r
325            go to 290
326  280       c=p/e(i)
327            r=dsqrt(c*c+1.0)
328            e(i+1)=s*e(i)*r
329            s=1.0/r
330            c=c/r
331  290       p=c*d(i)-s*g
332            d(i+1)=h+s*(c*g+s*d(i))
333c        do 300 k=1,n
334c           h=x(k,i+1)
335c           x(k,i+1)=x(k,i)*s+h*c
336c 300    x(k,i)=x(k,i)*c-h*s
337c        call vec_$dcopy(x(1,i),tmp1,n)
338c        call vec_$dcopy(x(1,i+1),tmp2,n)
339c        call vec_$dmult_constant(tmp2,n,c,x(1,i+1))
340c        call vec_$dmult_add(x(1,i+1),x(1,i),n,s,x(1,i+1))
341c        call vec_$dmult_constant(tmp2,n,-s,x(1,i))
342c        call vec_$dmult_add(x(1,i),tmp1,n,c,x(1,i))
343         call drot(n,x(1,i+1),1,x(1,i),1,c,s)
344  300    continue
345c
346         e(l)=s*p
347         d(l)=c*p
348         if (abs(e(l)).gt.b) go to 230
349c
350c     convergence
351c
352  310 d(l)=d(l)+f
353c
354c     ordering of eigenvalues
355c
356      ni=n-1
357      do 340 i=1,ni
358         k=i
359         p=d(i)
360         j1=i+1
361         do 320 j=j1,n
362            if (d(j).ge.p) go to 320
363            k=j
364            p=d(j)
365  320    continue
366         if (k.eq.i) go to 340
367         d(k)=d(i)
368         d(i)=p
369         do 330 j=1,n
370            p=x(j,i)
371            x(j,i)=x(j,k)
372  330    x(j,k)=p
373  340 continue
374      go to 360
375c
376c     special treatment of case n = 1
377c
378  350 d(1)=a(1,1)
379      x(1,1)=1.0
380  360 return
381c
382      end
383c
384c     subroutine sdiag2 (m,n,a,d,x)
385      subroutine sdiag2sc (m,n,a,d,x)
386      implicit real*8 (a-h,o-z)
387c
388c
389c      computation of all eigenvalues and eigenvectors of a real
390c      symmetric matrix by the method of qr transformations.
391c      if the euclidean norm of the rows varies   s t r o n g l y
392c      most accurate results may be obtained by permuting rows and
393c      columns to give an arrangement with increasing norms of rows.
394c
395c      two machine constants must be adjusted appropriately,
396c      eps = minimum of all x such that 1+x is greater than 1 on the
397c      e     computer,
398c      tol = inf / eps  with inf = minimum of all positive x represen-
399c            table within the computer.
400c      a dimension statement e(160) may also be changed appropriately.
401c
402c      input
403c
404c      (m)   not larger than 160,  corresponding value of the actual
405c            dimension statement a(m,m), d(m), x(m,m),
406c      (n)   not larger than (m), order of the matrix,
407c      (a)   the matrix to be diagonalized, its lower triangle has to
408c            be given as  ((a(i,j), j=1,i), i=1,n),
409c
410c      output
411c
412c      (d)   components d(1), ..., d(n) hold the computed eigenvalues
413c            in ascending sequence. the remaining components of (d) are
414c            unchanged,
415c      (x)   the computed eigenvector corresponding to the j-th eigen-
416c            value is stored as column (x(i,j), i=1,n). the eigenvectors
417c            are normalized and orthogonal to working accuracy. the
418c            remaining entries of (x) are unchanged.
419c
420c      array (a) is unaltered. however, the actual parameters
421c      corresponding to (a) and (x)  may be identical, ''overwriting''
422c      the eigenvectors on (a).
423c
424c      leibniz-rechenzentrum, munich 1965
425c
426c
427      dimension a(m,m), d(m), x(m,m)
428      dimension e(160)
429      integer*4 f4,h4,l4
430c
431c     correct adjustment for ieee floating point numbers (64 bits)
432c
433      eps=2.0d-15
434      tol=1.0d-292
435c
436      if (n.eq.1) go to 350
437      do 10 i=1,n
438      do 10 j=1,i
439   10 x(i,j)=a(i,j)
440c
441c     householder reduction
442c     simulation of loop do 150 i=n,2,(-1)
443c
444      do 150 ni=2,n
445         ii=n+2-ni
446c
447c     fake loop for recursive address calculation
448c
449      do 150 i=ii,ii
450         l=i-2
451         h=0.0d0
452         g=x(i,i-1)
453#if 1
454         l4=l
455         if (l4) 140,140,20
456#else
457         if (l) 140,140,20
458#endif
459   20    do 30 k=1,l
460   30    h=h+x(i,k)**2
461         s=h+g*g
462         if (s.ge.tol) go to 40
463         h=0.0d0
464         go to 140
465#if 1
466   40    h4=h
467         if (h4) 140,140,50
468#else
469   40    if (h) 140,140,50
470#endif
471   50    l=l+1
472         f=g
473         g=dsqrt(s)
474#if 1
475         f4=f
476         if (f4) 70,70,60
477#else
478         if (f) 70,70,60
479#endif
480   60    g=-g
481   70    h=s-f*g
482         x(i,i-1)=f-g
483         f=0.0d0
484c
485         do 110 j=1,l
486            x(j,i)=x(i,j)/h
487            s=0.0d0
488            do 80 k=1,j
489   80       s=s+x(j,k)*x(i,k)
490            j1=j+1
491            if (j1.gt.l) go to 100
492            do 90 k=j1,l
493   90       s=s+x(k,j)*x(i,k)
494  100       e(j)=s/h
495  110    f=f+s*x(j,i)
496c
497         f=f/(h+h)
498c
499         do 120 j=1,l
500  120    e(j)=e(j)-f*x(i,j)
501c
502         do 130 j=1,l
503            f=x(i,j)
504            s=e(j)
505         do 130 k=1,j
506  130    x(j,k)=x(j,k)-f*e(k)-x(i,k)*s
507c
508  140    d(i)=h
509  150 e(i-1)=g
510c
511c     accumulation of transformation matrices
512c
513      d(1)=x(1,1)
514      x(1,1)=1.0d0
515      do 200 i=2,n
516         l=i-1
517         if (d(i)) 190,190,160
518  160    do 180 j=1,l
519            s=0.0d0
520            do 170 k=1,l
521  170       s=s+x(i,k)*x(k,j)
522         do 180 k=1,l
523  180    x(k,j)=x(k,j)-s*x(k,i)
524  190    d(i)=x(i,i)
525         x(i,i)=1.0d0
526      do 200 j=1,l
527         x(i,j)=0.0d0
528  200 x(j,i)=0.0d0
529c
530c     diagonalization of the tridiagonal matrix
531c
532      b=0.0d0
533      f=0.0d0
534      e(n)=0.0d0
535c
536      do 310 l=1,n
537         h=eps*(abs(d(l))+abs(e(l)))
538         if (h.gt.b) b=h
539c
540c     test for splitting
541c
542         do 210 j=l,n
543            if (abs(e(j)).le.b) go to 220
544  210    continue
545c
546c     test for convergence
547c
548  220    if (j.eq.l) go to 310
549c
550c     shift from upper 2*2 minor
551c
552  230    p=(d(l+1)-d(l))*0.5d0/e(l)
553         r=dsqrt(p*p+1.0d0)
554         if (p) 240,250,250
555  240    p=p-r
556         go to 260
557  250    p=p+r
558  260    h=d(l)-e(l)/p
559         do 270 i=l,n
560  270    d(i)=d(i)-h
561         f=f+h
562c
563c     qr transformation
564c
565         p=d(j)
566         c=1.0d0
567         s=0.0d0
568c
569c     simulation of loop do 330 i=j-1,l,(-1)
570c
571         j1=j-1
572         do 300 ni=l,j1
573            ii=l+j1-ni
574c
575c     fake loop for recursive address calculation
576c
577         do 300 i=ii,ii
578            g=c*e(i)
579            h=c*p
580c
581c     protection against underflow of exponents
582c
583            if (abs(p).lt.abs(e(i))) go to 280
584            c=e(i)/p
585            r=dsqrt(c*c+1.0d0)
586            e(i+1)=s*p*r
587            s=c/r
588            c=1.0d0/r
589            go to 290
590  280       c=p/e(i)
591            r=dsqrt(c*c+1.0d0)
592            e(i+1)=s*e(i)*r
593            s=1.0d0/r
594            c=c/r
595  290       p=c*d(i)-s*g
596            d(i+1)=h+s*(c*g+s*d(i))
597         do 300 k=1,n
598            h=x(k,i+1)
599            x(k,i+1)=x(k,i)*s+h*c
600  300    x(k,i)=x(k,i)*c-h*s
601c
602         e(l)=s*p
603         d(l)=c*p
604         if (abs(e(l)).gt.b) go to 230
605c
606c     convergence
607c
608  310 d(l)=d(l)+f
609c
610c     ordering of eigenvalues
611c
612      ni=n-1
613      do 340 i=1,ni
614         k=i
615         p=d(i)
616         j1=i+1
617         do 320 j=j1,n
618            if (d(j).ge.p) go to 320
619            k=j
620            p=d(j)
621  320    continue
622         if (k.eq.i) go to 340
623         d(k)=d(i)
624         d(i)=p
625         do 330 j=1,n
626            p=x(j,i)
627            x(j,i)=x(j,k)
628  330    x(j,k)=p
629  340 continue
630      go to 360
631c
632c     special treatment of case n = 1
633c
634  350 d(1)=a(1,1)
635      x(1,1)=1.0d0
636  360 return
637c
638      end
639c
640      subroutine zeroit (a,n)
641      implicit real*8 (a-h,o-z)
642      dimension a(*)
643c     call vec_$dzero (a,n)
644      do  100 i=1,n
645        a(i)=0.0d0
646 100  continue
647      end
648      subroutine mult(tmat,con,num)
649      implicit real*8 (a-h,o-z)
650      dimension tmat(1)
651c
652c     this subroutine multiplies an array by a constant
653c     this is jmb invention; it is the same as dscal
654      call  dscal(num,con,tmat,1)
655c
656      end
657      subroutine mave(amat,vold,vnew,num)
658      implicit real*8 (a-h,o-z)
659      dimension amat(1),vold(1),vnew(1)
660c
661c     this subroutine multiplies a triangular matrix by a column vector
662c     and returns a new column vector.  vnew cannot be same place as vold
663c
664      n=num
665      do 13 i=1,n
666      ij=i*(i-1)/2
667      sum=0.0
668         do 20 j=1,n
669         ij=ij+1
670         if(j.gt.i) ij=ij+j-2
671         sum=sum+amat(ij)*vold(j)
672   20 continue
673      vnew(i)=sum
674   13 continue
675      return
676c
677      end
678      subroutine inner(x,y,sum,ne)
679c   WHO ON EARTH WROTE THIS - THIS DUPLICATES SCALAR, AND USES
680C   SINGLE PRECISION ZERO
681      implicit real*8 (a-h,o-z)
682      dimension x(1),y(1)
683c     multiplies the elements in two arrays with the same index and adds
684c     them similar to inner product
685c
686c     sum=0.0
687      sum=0.0d0
688      m=ne
689      do 30 l=1,m
690      sum=sum+x(l)*y(l)
691   30 continue
692      return
693c
694      end
695      subroutine tri(a,b,m)
696      implicit real*8 (a-h,o-z)
697      dimension a(1), b(1)
698c
699c     a symmetric quadratic matrix a is changed to triangular form b
700c
701      mm=m
702      ij=0
703      do 200 i=1,mm
704         iad=i*mm-mm
705         do 100 j=1,i
706            ij=ij+1
707            b(ij)=a(iad+j)
708  100 continue
709  200 continue
710      return
711c
712      end
713      subroutine quadbfta(a,b,c,m)
714      implicit real*8 (a-h,o-z)
715      dimension a(1),b(1)
716c
717c     make a quadratic symmetric or antisymmetric matrix b from
718c     triangular matrix a
719c
720      c1=c
721      con=abs(c1)
722      ij=0
723      do 10 i=1,m
724         iad=i*m-m
725         do 20 j=1,i
726            jad=j*m-m
727            ij=ij+1
728            b(iad+j)=con*a(ij)
729            b(jad+i)=c1*a(ij)
730   20 continue
731      b(iad+i)=b(iad+i)*(c1+con)/2
732   10 continue
733      return
734c
735      end
736      subroutine vecmat(amat,istcol,icol,vecin,vecout,idime)
737      implicit real*8 (a-h,o-z)
738      dimension amat(1), vecin(1), vecout(1)
739c
740c     multiplies amat(t) times a vector starting from stcol to stcol+ind
741c     looks like josep bofill creation
742      ist=istcol
743      ic=icol
744      id=idime
745      indcol=ist+ic-1
746      do 10 i=ist,indcol
747         nad=i*id-id
748         sum=0.0
749         do 20 j=1,id
750            sum=sum+amat(j+nad)*vecin(j)
751   20 continue
752         vecout(i)=sum
753   10 continue
754      return
755c
756      end
757      subroutine matmat(a,ias,ia,b,ibs,ib,d,id)
758      implicit real*8 (a-h,o-z)
759      dimension a(1),b(1),d(1)
760c
761c     multiplies a(t) x b with common dimension id, result in d
762c
763      ja=ia
764      jas=ias
765      jb=ib
766      jbs=ibs
767      jae=ja+jas-1
768      jbe=jb+jbs-1
769      idim=id
770      do 30 i=jas,jae
771         iad=i*idim-idim
772         do 20 j=jbs,jbe
773            jad=j*idim-idim
774            sum=0.0
775            do 10 k=1,idim
776               sum=sum+a(k+iad)*b(k+jad)
777   10 continue
778            d(jad+i)=sum
779   20 continue
780   30 continue
781      return
782c
783      end
784      subroutine eig (b,a,d,n)
785      implicit real*8 (a-h,o-z)
786      dimension a(n,n), b(1), d(1)
787      ij=0
788      do 10 i=1,n
789      do 10 j=1,i
790         ij=ij+1
791         a(i,j)=b(ij)
792   10 continue
793      call sdiag2 (n,n,a,d,a)
794      return
795c
796c
797      end
798      subroutine spur (a,b,ncf,s)
799      implicit real*8 (a-h,o-z)
800      dimension a(1), b(1)
801c
802c     ....    spur(ab) - both are synmmetrical triangular matrices
803c
804cccc  n=ncf
805      s=zero
806      ij=0
807      do 10 i=1,ncf
808      do 10 j=1,i
809         ij=ij+1
810         s=s+a(ij)*b(ij)
811         if (i.eq.j) s=s-a(ij)*b(ij)*0.5d0
812   10 continue
813      s=2*s
814      return
815c
816      end
817      subroutine txs_trans (a,ncf)
818      implicit real*8 (a-h,o-z)
819      dimension a(ncf,ncf)
820      do 10 i=1,ncf
821      do 10 j=1,i
822         s=a(i,j)
823         a(i,j)=a(j,i)
824         a(j,i)=s
825   10 continue
826      return
827c
828      end
829c======================================================================
830c linking routine between mamu and mxma which translates existing calls
831c of mamu directly into calls of mxma.
832c richard g.a. bone, march, 1991.
833c main interpretative problem is that mamu performs b*a matrix multiplication
834c whereas mxma performs a*b.
835c mamu works with either square or triangular matrices according to
836c arguments k,l,m, whereas mxma requires squares only.
837c n.b. in pulay group mxma may not take the same array address twice.
838c
839c     subroutine mamu (b,a,c,k,l,m,n,w)
840c     implicit double precision (a-h,o-z)
841c     dimension a(1),b(1),c(1),w(1)
842c     common /big/bl(1)
843c     common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap,
844c    1 npl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200),
845c    2 icode(200),inpf,ioutf
846c     nsq = n**2
847c     ntri = n*(n+1)/2
848c
849c     if((k.ne.0).and.(k.ne.1)) goto 9998
850c     if((l.ne.0).and.(l.ne.1)) goto 9998
851c     if((m.ne.0).and.(m.ne.1)) goto 9998
852c
853c     call getmem(nsq,ia)
854c     call getmem(nsq,ic)
855c
856c     if (l.eq.0) call squr(a,bl(ia),n)
857c     if (l.eq.1) call tfer(a,bl(ia),nsq)
858c
859c     if (k.eq.0) then
860c      call getmem(nsq,ib)
861c      call squr(b,bl(ib),n)
862c      call mxma(bl(ia),1,n,bl(ib),1,n,bl(ic),1,n,n,n,n)
863c      call retmem(1)
864c     endif
865c     if (k.eq.1) then
866c      ir=ir-1
867c      call mxma(bl(ia),1,n,b,1,n,bl(ic),1,n,n,n,n)
868c     endif
869c
870c     if (m.eq.0) call tri(bl(ic),c,n)
871c     if (m.eq.1) call tfer(bl(ic),c,nsq)
872c
873c     call retmem(ir)
874c
875c     call retmem(2)
876c     return
877c9998 write(icond,*)
878c    1  'incorrect use of mamu: integer arguments must be 1 or 0'
879c     return
880c     end
881c
882c======================================================================
883      subroutine squr(t,sq,n)
884      implicit real*8(a-h,o-z)
885c     triangle to square
886      dimension sq(1),t(1)
887      ij=0
888      ii=0
889      do 20 i=1,n
890      jj=0
891c$dir no_recurrence
892cdir$ ivdep
893      do 10 j=1,i
894      ij=ij+1
895      sq(ii+j)=t(ij)
896      sq(jj+i)=t(ij)
89710    jj=jj+n
89820    ii=ii+n
899      return
900      end
901      subroutine mxma(a,mcola,mrowa,b,mcolb,mrowb,
902     1                r,mcolr,mrowr,ncol,nlink,nrow)
903c     this code should run at about 34 mflops for matrices
904c     of dimensions between 50 and 500 on a 25 mhz rs6000
905      implicit real*8 (a-h,o-z)
906      dimension  a(*),b(*),r(*)
907      parameter (nb=60)
908c     written based on tips by  ron bell, ibm united kingdom, ltd.
909c     seeibm document no. gg24-3611
910c     note that this code, in spite of all efforts, does not reach
911c     the numerical performance (43 mflops/s) claimed in the above
912c     publication. it is assumed now that the performance figures
913c     of bell refer to the inner loop only and do not reflect
914c     the overhead.  the maximum performance of this code is about
915c     35 mflops/s
916      dimension rr(nb,nb),aa(nb,nb),bb(nb,nb)
917c     performs r=a*b; the actual dimensions are naxma,maxmb,naxmb
918c     the addressing function is: r(i,j)=r((i-1)*mcolr+(j-1)*mrowr+1)
919c     a(i,k)=a((i-1)*mcola+(k-1)*mrowa+1)
920c     b(k,j)=b((k-1)*mcolb+(j-1)*mrowb+1)
921c     good example of obscure addressing in fortran
922c     please do not write code like this. unfortunately, this piece of
923c     code is widely used on the crays
924c     the buffer size has been roughly optimized for the apollo dn10k
925      ir=1
926      do 3 j=1,nrow
927        irr=ir
928        do 2 i=1,ncol
929c        ncol is the number of rows. the dutch mind is perplexing
930          r(irr)=0.0d0
931          irr=irr+mcolr
932 2      continue
933        ir=ir+mrowr
934 3    continue
935      do 1400 ii=1,ncol,nb
936          ie=min0(ii+nb-1,ncol)
937          ie1=ie-ii+1
938          ie2=3*(ie1/3)
939c         write(*,*) ie,ie1
940          do 1300 jj=1,nrow,nb
941            je=min0(jj+nb-1,nrow)
942            je1=je-jj+1
943            je2=3*(je1/3)
944c      make sure that ie2 and je2 are divisible by 3 for the
945c      loop unrolling
946            ir=(ii-1)*mcolr+(jj-1)*mrowr+1
947            do 200 i=1,ie1
948              irr=ir
949              do 100 j=1,je1
950c               rr(i,j)=r(irr)
951                rr(i,j)=0.0d0
952                irr=irr+mrowr
953 100          continue
954              ir=ir+mcolr
955 200        continue
956            do 1000 kk=1,nlink,nb
957              ke=min0(kk+nb-1,nlink)
958              ke1=ke-kk+1
959              ia=(ii-1)*mcola+(kk-1)*mrowa+1
960              do 400 i=1,ie1
961                iaa=ia
962                do 300 k=1,ke1
963                  aa(k,i)=a(iaa)
964                  iaa=iaa+mrowa
965 300            continue
966                ia=ia+mcola
967 400          continue
968              ib=(kk-1)*mcolb+(jj-1)*mrowb+1
969              do 600 j=1,je1
970                ibb=ib
971                do 500 k=1,ke1
972                  bb(k,j)=b(ibb)
973                  ibb=ibb+mcolb
974 500            continue
975                ib=ib+mrowb
976 600          continue
977              do 900 i=1,ie2,3
978                do 800 j=1,je2,3
979                  s00=rr(i,j)
980                  s10=rr(i+1,j)
981                  s20=rr(i+2,j)
982                  s01=rr(i,j+1)
983                  s11=rr(i+1,j+1)
984                  s21=rr(i+2,j+1)
985                  s02=rr(i,j+2)
986                  s12=rr(i+1,j+2)
987                  s22=rr(i+2,j+2)
988                  do 700 k=1,ke1
989                    s00=s00+aa(k,i)*bb(k,j)
990                    s01=s01+aa(k,i)*bb(k,j+1)
991                    s02=s02+aa(k,i)*bb(k,j+2)
992                    s10=s10+aa(k,i+1)*bb(k,j)
993                    s11=s11+aa(k,i+1)*bb(k,j+1)
994                    s12=s12+aa(k,i+1)*bb(k,j+2)
995                    s20=s20+aa(k,i+2)*bb(k,j)
996                    s21=s21+aa(k,i+2)*bb(k,j+1)
997                    s22=s22+aa(k,i+2)*bb(k,j+2)
998 700              continue
999                  rr(i,j)=s00
1000                  rr(i+1,j)=s10
1001                  rr(i+2,j)=s20
1002                  rr(i,j+1)=s01
1003                  rr(i+1,j+1)=s11
1004                  rr(i+2,j+1)=s21
1005                  rr(i,j+2)=s02
1006                  rr(i+1,j+2)=s12
1007                  rr(i+2,j+2)=s22
1008 800            continue
1009                do 820 j=je2+1,je1
1010                  s00=rr(i,j)
1011                  s10=rr(i+1,j)
1012                  s20=rr(i+2,j)
1013                  do 810 k=1,ke1
1014                    s00=s00+aa(k,i)*bb(k,j)
1015                    s10=s10+aa(k,i+1)*bb(k,j)
1016                    s20=s20+aa(k,i+2)*bb(k,j)
1017 810              continue
1018                  rr(i,j)=s00
1019                  rr(i+1,j)=s10
1020                  rr(i+2,j)=s20
1021 820            continue
1022 900          continue
1023              do 950 i=ie2+1,ie1
1024               do 940 j=1,je1
1025                do 930 k=1,ke1
1026                 rr(i,j)=rr(i,j)+aa(k,i)*bb(k,j)
1027 930            continue
1028 940           continue
1029 950          continue
1030 1000       continue
1031            ir=(ii-1)*mcolr+(jj-1)*mrowr+1
1032            do 1200 i=1,ie1
1033              irr=ir
1034              do 1100 j=1,je1
1035                r(irr)=r(irr)+rr(i,j)
1036                irr=irr+mrowr
1037 1100         continue
1038              ir=ir+mcolr
1039 1200       continue
1040 1300     continue
1041 1400 continue
1042      end
1043      subroutine nerror(noer,routine,message,n1,n2)
1044#include "errquit.fh"
1045      character*(*) routine
1046      character*(*) message
1047      common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap,n
1048     1pl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200),icode(20
1049     20),inpf,ioutf
1050      write(iout,*) 'Error no. ',noer,' in ',routine,' ',message,n1,n2
1051      write(icond,*) 'Error no. ',noer,' in ',routine,' ',message,n1,n2
1052c100  format(' Error No.',i4,' in ',a20,2x,/,a80,/,' variables ',2i6)
1053      call errquit('texas: nerror called', 0, INT_ERR)
1054*      stop 20
1055      end
1056      subroutine txs_message(routine,messag1,n1,n2)
1057      character*(*) routine
1058      character*(*) messag1
1059      common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap,n
1060     1pl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200),icode(20
1061     20),inpf,ioutf
1062      write(iout,*) 'Message from ',routine,' ',messag1,n1,n2
1063      write(icond,*) 'Message from ',routine,' ',messag1,n1,n2
1064      end
1065      subroutine txs_error
1066#include "errquit.fh"
1067      common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap,n
1068     1pl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200),icode(20
1069     20),inpf,ioutf
1070      write(iout,*) ' error found'
1071      write(icond,*) ' error found'
1072*      stop 10
1073      call errquit('texas: txs_error called', 0, INT_ERR)
1074      end
1075c==================================================================
1076      subroutine dxypz(n,dx,incx, dy,incy, dz,incz)
1077c
1078c vector_x*vector_y + vector_z ===> vector_z
1079c
1080c modification of the daxpy blas routine. (KW)
1081c
1082c
1083      double precision dx(*),dy(*),dz(*)
1084      integer i,incx,incy,incz, ix,iy,iz, m,mp1,n
1085c
1086      if(n.le.0)return
1087      if(incx.eq.1.and.incy.eq.1.and.incz.eq.1)go to 20
1088c
1089c        code for unequal increments or equal increments
1090c          not equal to 1
1091c
1092      ix = 1
1093      iy = 1
1094      iz = 1
1095      if(incx.lt.0) ix= (-n+1)*incx + 1
1096      if(incy.lt.0) iy= (-n+1)*incy + 1
1097      if(incz.lt.0) iz =(-n+1)*incz + 1
1098c
1099      do 10 i = 1,n
1100        dz(iz) = dz(iz) + dy(iy)*dx(ix)
1101        ix = ix + incx
1102        iy = iy + incy
1103        iz = iz + incz
1104   10 continue
1105c
1106      return
1107c
1108c        code for both increments equal to 1
1109c
1110c
1111c        clean-up loop
1112c
1113   20 m = mod(n,4)
1114      if( m .eq. 0 ) go to 40
1115      do 30 i = 1,m
1116        dz(i) = dz(i) + dy(i)*dx(i)
1117   30 continue
1118      if( n .lt. 4 ) return
1119   40 mp1 = m + 1
1120      do 50 i0= mp1,n,4
1121        i1=i0+1
1122        i2=i1+1
1123        i3=i2+1
1124        dz(i0) = dz(i0) + dy(i0)*dx(i0)
1125        dz(i1) = dz(i1) + dy(i1)*dx(i1)
1126        dz(i2) = dz(i2) + dy(i2)*dx(i2)
1127        dz(i3) = dz(i3) + dy(i3)*dx(i3)
1128   50 continue
1129      return
1130      end
1131c==================================================================
1132      subroutine dxyz(n,dx,incx, dy,incy, dz,incz)
1133c
1134c vector_x*vector_y  ===> vector_z
1135c
1136c modification of the daxpy blas routine. (KW)
1137c
1138c
1139      double precision dx(*),dy(*),dz(*)
1140      integer i,incx,incy,incz, ix,iy,iz, m,mp1,n
1141c
1142      if(n.le.0)return
1143      if(incx.eq.1.and.incy.eq.1.and.incz.eq.1)go to 20
1144c
1145c        code for unequal increments or equal increments
1146c          not equal to 1
1147c
1148      ix = 1
1149      iy = 1
1150      iz = 1
1151      if(incx.lt.0) ix= (-n+1)*incx + 1
1152      if(incy.lt.0) iy= (-n+1)*incy + 1
1153      if(incz.lt.0) iz =(-n+1)*incz + 1
1154c
1155      do 10 i = 1,n
1156        dz(iz) = dy(iy)*dx(ix)
1157        ix = ix + incx
1158        iy = iy + incy
1159        iz = iz + incz
1160   10 continue
1161c
1162      return
1163c
1164c        code for both increments equal to 1
1165c
1166c
1167c        clean-up loop
1168c
1169   20 m = mod(n,4)
1170      if( m .eq. 0 ) go to 40
1171      do 30 i = 1,m
1172        dz(i) = dy(i)*dx(i)
1173   30 continue
1174      if( n .lt. 4 ) return
1175   40 mp1 = m + 1
1176      do 50 i0= mp1,n,4
1177        i1=i0+1
1178        i2=i1+1
1179        i3=i2+1
1180        dz(i0) =  dy(i0)*dx(i0)
1181        dz(i1) =  dy(i1)*dx(i1)
1182        dz(i2) =  dy(i2)*dx(i2)
1183        dz(i3) =  dy(i3)*dx(i3)
1184   50 continue
1185      return
1186      end
1187c==================================================================
1188      subroutine dxpy(n,dx,incx, dy,incy)
1189c
1190c vector_x+vector_y  ===> vector_y
1191c
1192c modification of the daxpy blas routine. (KW)
1193c
1194c
1195      double precision dx(*),dy(*)
1196      integer i,incx,incy, ix,iy, m,mp1,n
1197c
1198      if(n.le.0)return
1199      if(incx.eq.1.and.incy.eq.1)go to 20
1200c
1201c        code for unequal increments or equal increments
1202c          not equal to 1
1203c
1204      ix = 1
1205      iy = 1
1206      if(incx.lt.0) ix= (-n+1)*incx + 1
1207      if(incy.lt.0) iy= (-n+1)*incy + 1
1208c
1209      do 10 i = 1,n
1210        dy(iy) = dy(iy) + dx(ix)
1211        ix = ix + incx
1212        iy = iy + incy
1213   10 continue
1214c
1215      return
1216c
1217c        code for both increments equal to 1
1218c
1219c
1220c        clean-up loop
1221c
1222   20 m = mod(n,4)
1223      if( m .eq. 0 ) go to 40
1224      do 30 i = 1,m
1225        dy(i) = dy(i) + dx(i)
1226   30 continue
1227      if( n .lt. 4 ) return
1228   40 mp1 = m + 1
1229      do 50 i0= mp1,n,4
1230        i1=i0+1
1231        i2=i1+1
1232        i3=i2+1
1233        dy(i0) =  dy(i0) + dx(i0)
1234        dy(i1) =  dy(i1) + dx(i1)
1235        dy(i2) =  dy(i2) + dx(i2)
1236        dy(i3) =  dy(i3) + dx(i3)
1237   50 continue
1238      return
1239      end
1240c==================================================================
1241      subroutine get_ij_half(ij, i, j)
1242c
1243c extracts indeces i & j from common index ij=i*(i-1)/2 + j
1244c
1245      implicit none
1246      integer ij, i, j
1247      intrinsic sqrt, float
1248c
1249      i = sqrt(float(ij + ij))
1250      j = ij - i*(i-1)/2
1251      if (i .lt. j) then
1252         i = i + 1
1253         j = ij - i*(i-1)/2
1254      endif
1255c
1256      end
1257c-----------------------------------------------------------------
1258      subroutine get_ij_full(ij,nj, i, j)
1259c
1260c     extracts indeces i and j from common index ij=(i-1)*nj + j
1261c
1262      implicit none
1263      integer ij,nj, i, j
1264c
1265      i=ij/nj+1
1266      j=ij-(i-1)*nj
1267c
1268      if(j.eq.0) then
1269         i=i-1
1270         j=ij-(i-1)*nj
1271      endif
1272c
1273      end
1274c-----------------------------------------------------------------
1275