1c $Id$ 2c 3c this module contains the home-made service routines 4c like matrix manipulations etc. 5c some of these duplicate blas routines and should be replaced 6c by them later 7c.... now come the homebrewn 8c.. replaced by blas routine (same calling sequence) 9c subroutine add1 (a,con,b,nn) 10c implicit real*8 (a-h,o-z) 11c dimension a(1), b(1) 12c n=nn 13c do 10 i=1,n 14c 10 b(i)=b(i)+con*a(i) 15c return 16c 17c end 18c subroutine tfer (a,b,n) 19c implicit real*8 (a-h,o-z) 20c dimension a(1), b(1) 21c 22c transfer a to b 23c 24c nn=n 25c do 10 i=1,nn 26c 10 b(i)=a(i) 27c return 28c 29c end 30 subroutine txs_add(a,b,n) 31 implicit double precision (a-h,o-z) 32 dimension a(*),b(*) 33c... adds a to b, result in b 34c%include '/sys/ins/base.ins.ftn' 35c%include '/sys/ins/vec.ins.ftn' 36c call vec_$dadd_vector(a,b,n,b) 37 do 100 i=1,n 38 b(i)=b(i)+a(i) 39 100 continue 40 end 41 subroutine txs_add1(a,con,b,n) 42 implicit double precision (a-h,o-z) 43 dimension a(*),b(*) 44 call daxpy(n,con,a,1,b,1) 45 end 46c 47 subroutine tfer (a,b,n) 48 implicit double precision (a-h,o-z) 49 dimension a(*),b(*) 50c 51* if (n.lt.1000) then 52 do i = 1, n 53 b(i) = a(i) 54 enddo 55* else 56* call dcopy(n,a,1,b,1) 57* endif 58c 59 end 60 subroutine mxmtr(a,b,c,m,n,p) 61c... this is not a true blas routine- it is our own creation 62c... this subroutine performs the matrix multiply c=a(trspse)*b 63c both the storage dimension and the real dimension of the 64c matrices is given by a(p,m),b(p,n),c(m,n) 65c it uses an efficient blocking algorithm 66c note that ingeneral it is more efficient to perform a(tr)b 67c than a*b. transpose the matrix before if you have to 68 implicit real*8 (a-h,o-z) 69 integer p 70 dimension a(p,m), b(p,n),c(m,n) 71 parameter (ibl=16) 72c performs a(tr)*b=c 73 do 500 i=1,m,ibl 74 iend=min0(i+ibl-1,m) 75 do 500 j=1,n,ibl 76 jend=min0(j+ibl-1,n) 77 do 500 i1=i,iend 78 do 500 j1=j,jend 79 sum=0.0d0 80c sum=vec_$ddot(a(1,i1),b(1,j1),p) 81 sum=ddot(p,a(1,i1),1,b(1,j1),1) 82c call scalar(a(1,i1),b(1,j1),p,sum) 83c do 400 k=1,p 84c sum=sum+a(k,i1)*b(k,j1) 85c400 continue 86 c(i1,j1)=sum 87 500 continue 88 end 89c 90c subroutine sdiag2v(m,n,a,d,x) 91 subroutine sdiag2 (m,n,a,d,x) 92 implicit real*8 (a-h,o-z) 93 integer*4 f4,h4,l4 94ckwol parameter (mxdim=401) 95ckwol parameter (mxdim=601) 96 parameter (mxdim=2000) 97c 98c 99c computation of all eigenvalues and eigenvectors of a real 100c symmetric matrix by the method of qr transformations. 101c if the euclidean norm of the rows varies s t r o n g l y 102c most accurate results may be obtained by permuting rows and 103c columns to give an arrangement with increasing norms of rows. 104c 105c two machine constants must be adjusted appropriately, 106c eps = minimum of all x such that 1+x is greater than 1 on the 107c e computer, 108c tol = inf / eps with inf = minimum of all positive x represen- 109c table within the computer. 110c a dimension statement e(256) may also be changed appropriately. 111c 112c input 113c 114c (m) not larger than mxdim, corresponding value of the actual 115c dimension statement a(m,m), d(m), x(m,m), 116c (n) not larger than (m), order of the matrix, 117c (a) the matrix to be diagonalized, its lower triangle has to 118c be given as ((a(i,j), j=1,i), i=1,n), 119c 120c output 121c 122c (d) components d(1), ..., d(n) hold the computed eigenvalues 123c in ascending sequence. the remaining components of (d) are 124c unchanged, 125c (x) the computed eigenvector corresponding to the j-th eigen- 126c value is stored as column (x(i,j), i=1,n). the eigenvectors 127c are normalized and orthogonal to working accuracy. the 128c remaining entries of (x) are unchanged. 129c 130c array (a) is unaltered. however, the actual parameters 131c corresponding to (a) and (x) may be identical, ''overwriting'' 132c the eigenvectors on (a). 133c 134c leibniz-rechenzentrum, munich 1965 135c 136c 137 dimension a(m,m), d(m), x(m,m) 138 dimension e(mxdim) 139c 140c correct adjustment for ieee 64 bit floating point numbers` 141c is about (not too sensitive) 142 eps=1.0d-14 143 tol=1.0d-200 144c 145 if (n.eq.1) go to 350 146 if (n.gt.mxdim) then 147 write(*,*) ' dimension too big, increase mxdim in sdiag2' 148 write (*,*) ' max= ',mxdim, ' actual= ',n 149 call txs_error 150c can be replaced by go to 360 if you do not have "error" 151 end if 152 do 10 i=1,n 153 do 10 j=1,i 154 10 x(i,j)=a(i,j) 155c 156c householder reduction 157c simulation of loop do 150 i=n,2,(-1) 158c 159 do 150 ni=2,n 160 ii=n+2-ni 161c 162c fake loop for recursive address calculation 163c 164 do 150 i=ii,ii 165 l=i-2 166 h=0.0 167 g=x(i,i-1) 168#if 1 169 l4=l 170 if (l4) 140,140,20 171#else 172 if (l) 140,140,20 173#endif 174 20 do 30 k=1,l 175 30 h=h+x(i,k)**2 176 s=h+g*g 177 if (s.ge.tol) go to 40 178 h=0.0 179 go to 140 180#if 1 181 40 h4=h 182 if (h4) 140,140,50 183#else 184 40 if (h) 140,140,50 185#endif 186 50 l=l+1 187 f=g 188 g=dsqrt(s) 189#if 1 190 f4=f 191 if (f4) 70,70,60 192#else 193 if (f) 70,70,60 194#endif 195 60 g=-g 196 70 h=s-f*g 197 x(i,i-1)=f-g 198 f=0.0 199c 200 do 110 j=1,l 201 x(j,i)=x(i,j)/h 202 s=0.0 203c do 80 k=1,j 204c 80 s=s+x(j,k)*x(i,k) 205c s=vec_$ddot_i(x(j,1),m,x(i,1),m,j) 206 s=ddot(j,x(j,1),m,x(i,1),m) 207 j1=j+1 208 if (j1.gt.l) go to 100 209c do 90 k=j1,l 210c 90 s=s+x(k,j)*x(i,k) 211 j1l=l-j1+1 212c write(*,*) s,j,j1,i,m,j1l 213c s=s+vec_$ddot_i(x(j1,j),1,x(i,j1),m,j1l) 214 s=s+ddot(j1l,x(j1,j),1,x(i,j1),m) 215 100 e(j)=s/h 216 110 f=f+s*x(j,i) 217c 218 f=f/(h+h) 219c 220c do 120 j=1,l 221c 120 e(j)=e(j)-f*x(i,j) 222c call vec_$dmult_add_i(e,1,x(i,1),m,l,-f,e,1) 223 call daxpy(l,-f,x(i,1),m,e,1) 224c 225 do 130 j=1,l 226 f=x(i,j) 227 s=e(j) 228c do 130 k=1,j 229c 130 x(j,k)=x(j,k)-f*e(k)-x(i,k)*s 230c call vec_$dmult_add_i(x(j,1),m,e,1,j,-f,x(j,1),m) 231c call vec_$dmult_add_i(x(j,1),m,x(i,1),m,j,-s,x(j,1),m) 232 call daxpy(j,-f,e,1,x(j,1),m) 233 call daxpy(j,-s,x(i,1),m,x(j,1),m) 234 130 continue 235c 236 140 d(i)=h 237 150 e(i-1)=g 238c 239c accumulation of transformation matrices 240c 241 d(1)=x(1,1) 242 x(1,1)=1.0 243 do 200 i=2,n 244 l=i-1 245 if (d(i)) 190,190,160 246 160 do 180 j=1,l 247 s=0.0 248c do 170 k=1,l 249c 170 s=s+x(i,k)*x(k,j) 250c s=vec_$ddot_i(x(i,1),m,x(1,j),1,l) 251 s=ddot(l,x(i,1),m,x(1,j),1) 252c do 180 k=1,l 253c 180 x(k,j)=x(k,j)-s*x(k,i) 254c call vec_$dmult_add(x(1,j),x(1,i),l,-s,x(1,j)) 255 call daxpy(l,-s,x(1,i),1,x(1,j),1) 256 180 continue 257 190 d(i)=x(i,i) 258 x(i,i)=1.0 259 do 200 j=1,l 260 x(i,j)=0.0 261 200 x(j,i)=0.0 262c call vec_$dzero_i(x(i,1),m,l) 263c call vec_$dzero(x(1,i),l) 264c 200 continue 265c 266c diagonalization of the tridiagonal matrix 267c 268 b=0.0 269 f=0.0 270 e(n)=0.0 271c 272 do 310 l=1,n 273 h=eps*(dabs(d(l))+dabs(e(l))) 274 if (h.gt.b) b=h 275c 276c test for splitting 277c 278 do 210 j=l,n 279 if (dabs(e(j)).le.b) go to 220 280 210 continue 281c 282c test for convergence 283c 284 220 if (j.eq.l) go to 310 285c 286c shift from upper 2*2 minor 287c 288 230 p=(d(l+1)-d(l))*0.5/e(l) 289 r=dsqrt(p*p+1.0) 290 if (p) 240,250,250 291 240 p=p-r 292 go to 260 293 250 p=p+r 294 260 h=d(l)-e(l)/p 295 do 270 i=l,n 296 270 d(i)=d(i)-h 297 f=f+h 298c 299c qr transformation 300c 301 p=d(j) 302 c=1.0 303 s=0.0 304c 305c simulation of loop do 330 i=j-1,l,(-1) 306c 307 j1=j-1 308 do 300 ni=l,j1 309 ii=l+j1-ni 310c 311c fake loop for recursive address calculation 312c 313 do 300 i=ii,ii 314 g=c*e(i) 315 h=c*p 316c 317c protection against underflow of exponents 318c 319 if (dabs(p).lt.dabs(e(i))) go to 280 320 c=e(i)/p 321 r=dsqrt(c*c+1.0) 322 e(i+1)=s*p*r 323 s=c/r 324 c=1.0/r 325 go to 290 326 280 c=p/e(i) 327 r=dsqrt(c*c+1.0) 328 e(i+1)=s*e(i)*r 329 s=1.0/r 330 c=c/r 331 290 p=c*d(i)-s*g 332 d(i+1)=h+s*(c*g+s*d(i)) 333c do 300 k=1,n 334c h=x(k,i+1) 335c x(k,i+1)=x(k,i)*s+h*c 336c 300 x(k,i)=x(k,i)*c-h*s 337c call vec_$dcopy(x(1,i),tmp1,n) 338c call vec_$dcopy(x(1,i+1),tmp2,n) 339c call vec_$dmult_constant(tmp2,n,c,x(1,i+1)) 340c call vec_$dmult_add(x(1,i+1),x(1,i),n,s,x(1,i+1)) 341c call vec_$dmult_constant(tmp2,n,-s,x(1,i)) 342c call vec_$dmult_add(x(1,i),tmp1,n,c,x(1,i)) 343 call drot(n,x(1,i+1),1,x(1,i),1,c,s) 344 300 continue 345c 346 e(l)=s*p 347 d(l)=c*p 348 if (abs(e(l)).gt.b) go to 230 349c 350c convergence 351c 352 310 d(l)=d(l)+f 353c 354c ordering of eigenvalues 355c 356 ni=n-1 357 do 340 i=1,ni 358 k=i 359 p=d(i) 360 j1=i+1 361 do 320 j=j1,n 362 if (d(j).ge.p) go to 320 363 k=j 364 p=d(j) 365 320 continue 366 if (k.eq.i) go to 340 367 d(k)=d(i) 368 d(i)=p 369 do 330 j=1,n 370 p=x(j,i) 371 x(j,i)=x(j,k) 372 330 x(j,k)=p 373 340 continue 374 go to 360 375c 376c special treatment of case n = 1 377c 378 350 d(1)=a(1,1) 379 x(1,1)=1.0 380 360 return 381c 382 end 383c 384c subroutine sdiag2 (m,n,a,d,x) 385 subroutine sdiag2sc (m,n,a,d,x) 386 implicit real*8 (a-h,o-z) 387c 388c 389c computation of all eigenvalues and eigenvectors of a real 390c symmetric matrix by the method of qr transformations. 391c if the euclidean norm of the rows varies s t r o n g l y 392c most accurate results may be obtained by permuting rows and 393c columns to give an arrangement with increasing norms of rows. 394c 395c two machine constants must be adjusted appropriately, 396c eps = minimum of all x such that 1+x is greater than 1 on the 397c e computer, 398c tol = inf / eps with inf = minimum of all positive x represen- 399c table within the computer. 400c a dimension statement e(160) may also be changed appropriately. 401c 402c input 403c 404c (m) not larger than 160, corresponding value of the actual 405c dimension statement a(m,m), d(m), x(m,m), 406c (n) not larger than (m), order of the matrix, 407c (a) the matrix to be diagonalized, its lower triangle has to 408c be given as ((a(i,j), j=1,i), i=1,n), 409c 410c output 411c 412c (d) components d(1), ..., d(n) hold the computed eigenvalues 413c in ascending sequence. the remaining components of (d) are 414c unchanged, 415c (x) the computed eigenvector corresponding to the j-th eigen- 416c value is stored as column (x(i,j), i=1,n). the eigenvectors 417c are normalized and orthogonal to working accuracy. the 418c remaining entries of (x) are unchanged. 419c 420c array (a) is unaltered. however, the actual parameters 421c corresponding to (a) and (x) may be identical, ''overwriting'' 422c the eigenvectors on (a). 423c 424c leibniz-rechenzentrum, munich 1965 425c 426c 427 dimension a(m,m), d(m), x(m,m) 428 dimension e(160) 429 integer*4 f4,h4,l4 430c 431c correct adjustment for ieee floating point numbers (64 bits) 432c 433 eps=2.0d-15 434 tol=1.0d-292 435c 436 if (n.eq.1) go to 350 437 do 10 i=1,n 438 do 10 j=1,i 439 10 x(i,j)=a(i,j) 440c 441c householder reduction 442c simulation of loop do 150 i=n,2,(-1) 443c 444 do 150 ni=2,n 445 ii=n+2-ni 446c 447c fake loop for recursive address calculation 448c 449 do 150 i=ii,ii 450 l=i-2 451 h=0.0d0 452 g=x(i,i-1) 453#if 1 454 l4=l 455 if (l4) 140,140,20 456#else 457 if (l) 140,140,20 458#endif 459 20 do 30 k=1,l 460 30 h=h+x(i,k)**2 461 s=h+g*g 462 if (s.ge.tol) go to 40 463 h=0.0d0 464 go to 140 465#if 1 466 40 h4=h 467 if (h4) 140,140,50 468#else 469 40 if (h) 140,140,50 470#endif 471 50 l=l+1 472 f=g 473 g=dsqrt(s) 474#if 1 475 f4=f 476 if (f4) 70,70,60 477#else 478 if (f) 70,70,60 479#endif 480 60 g=-g 481 70 h=s-f*g 482 x(i,i-1)=f-g 483 f=0.0d0 484c 485 do 110 j=1,l 486 x(j,i)=x(i,j)/h 487 s=0.0d0 488 do 80 k=1,j 489 80 s=s+x(j,k)*x(i,k) 490 j1=j+1 491 if (j1.gt.l) go to 100 492 do 90 k=j1,l 493 90 s=s+x(k,j)*x(i,k) 494 100 e(j)=s/h 495 110 f=f+s*x(j,i) 496c 497 f=f/(h+h) 498c 499 do 120 j=1,l 500 120 e(j)=e(j)-f*x(i,j) 501c 502 do 130 j=1,l 503 f=x(i,j) 504 s=e(j) 505 do 130 k=1,j 506 130 x(j,k)=x(j,k)-f*e(k)-x(i,k)*s 507c 508 140 d(i)=h 509 150 e(i-1)=g 510c 511c accumulation of transformation matrices 512c 513 d(1)=x(1,1) 514 x(1,1)=1.0d0 515 do 200 i=2,n 516 l=i-1 517 if (d(i)) 190,190,160 518 160 do 180 j=1,l 519 s=0.0d0 520 do 170 k=1,l 521 170 s=s+x(i,k)*x(k,j) 522 do 180 k=1,l 523 180 x(k,j)=x(k,j)-s*x(k,i) 524 190 d(i)=x(i,i) 525 x(i,i)=1.0d0 526 do 200 j=1,l 527 x(i,j)=0.0d0 528 200 x(j,i)=0.0d0 529c 530c diagonalization of the tridiagonal matrix 531c 532 b=0.0d0 533 f=0.0d0 534 e(n)=0.0d0 535c 536 do 310 l=1,n 537 h=eps*(abs(d(l))+abs(e(l))) 538 if (h.gt.b) b=h 539c 540c test for splitting 541c 542 do 210 j=l,n 543 if (abs(e(j)).le.b) go to 220 544 210 continue 545c 546c test for convergence 547c 548 220 if (j.eq.l) go to 310 549c 550c shift from upper 2*2 minor 551c 552 230 p=(d(l+1)-d(l))*0.5d0/e(l) 553 r=dsqrt(p*p+1.0d0) 554 if (p) 240,250,250 555 240 p=p-r 556 go to 260 557 250 p=p+r 558 260 h=d(l)-e(l)/p 559 do 270 i=l,n 560 270 d(i)=d(i)-h 561 f=f+h 562c 563c qr transformation 564c 565 p=d(j) 566 c=1.0d0 567 s=0.0d0 568c 569c simulation of loop do 330 i=j-1,l,(-1) 570c 571 j1=j-1 572 do 300 ni=l,j1 573 ii=l+j1-ni 574c 575c fake loop for recursive address calculation 576c 577 do 300 i=ii,ii 578 g=c*e(i) 579 h=c*p 580c 581c protection against underflow of exponents 582c 583 if (abs(p).lt.abs(e(i))) go to 280 584 c=e(i)/p 585 r=dsqrt(c*c+1.0d0) 586 e(i+1)=s*p*r 587 s=c/r 588 c=1.0d0/r 589 go to 290 590 280 c=p/e(i) 591 r=dsqrt(c*c+1.0d0) 592 e(i+1)=s*e(i)*r 593 s=1.0d0/r 594 c=c/r 595 290 p=c*d(i)-s*g 596 d(i+1)=h+s*(c*g+s*d(i)) 597 do 300 k=1,n 598 h=x(k,i+1) 599 x(k,i+1)=x(k,i)*s+h*c 600 300 x(k,i)=x(k,i)*c-h*s 601c 602 e(l)=s*p 603 d(l)=c*p 604 if (abs(e(l)).gt.b) go to 230 605c 606c convergence 607c 608 310 d(l)=d(l)+f 609c 610c ordering of eigenvalues 611c 612 ni=n-1 613 do 340 i=1,ni 614 k=i 615 p=d(i) 616 j1=i+1 617 do 320 j=j1,n 618 if (d(j).ge.p) go to 320 619 k=j 620 p=d(j) 621 320 continue 622 if (k.eq.i) go to 340 623 d(k)=d(i) 624 d(i)=p 625 do 330 j=1,n 626 p=x(j,i) 627 x(j,i)=x(j,k) 628 330 x(j,k)=p 629 340 continue 630 go to 360 631c 632c special treatment of case n = 1 633c 634 350 d(1)=a(1,1) 635 x(1,1)=1.0d0 636 360 return 637c 638 end 639c 640 subroutine zeroit (a,n) 641 implicit real*8 (a-h,o-z) 642 dimension a(*) 643c call vec_$dzero (a,n) 644 do 100 i=1,n 645 a(i)=0.0d0 646 100 continue 647 end 648 subroutine mult(tmat,con,num) 649 implicit real*8 (a-h,o-z) 650 dimension tmat(1) 651c 652c this subroutine multiplies an array by a constant 653c this is jmb invention; it is the same as dscal 654 call dscal(num,con,tmat,1) 655c 656 end 657 subroutine mave(amat,vold,vnew,num) 658 implicit real*8 (a-h,o-z) 659 dimension amat(1),vold(1),vnew(1) 660c 661c this subroutine multiplies a triangular matrix by a column vector 662c and returns a new column vector. vnew cannot be same place as vold 663c 664 n=num 665 do 13 i=1,n 666 ij=i*(i-1)/2 667 sum=0.0 668 do 20 j=1,n 669 ij=ij+1 670 if(j.gt.i) ij=ij+j-2 671 sum=sum+amat(ij)*vold(j) 672 20 continue 673 vnew(i)=sum 674 13 continue 675 return 676c 677 end 678 subroutine inner(x,y,sum,ne) 679c WHO ON EARTH WROTE THIS - THIS DUPLICATES SCALAR, AND USES 680C SINGLE PRECISION ZERO 681 implicit real*8 (a-h,o-z) 682 dimension x(1),y(1) 683c multiplies the elements in two arrays with the same index and adds 684c them similar to inner product 685c 686c sum=0.0 687 sum=0.0d0 688 m=ne 689 do 30 l=1,m 690 sum=sum+x(l)*y(l) 691 30 continue 692 return 693c 694 end 695 subroutine tri(a,b,m) 696 implicit real*8 (a-h,o-z) 697 dimension a(1), b(1) 698c 699c a symmetric quadratic matrix a is changed to triangular form b 700c 701 mm=m 702 ij=0 703 do 200 i=1,mm 704 iad=i*mm-mm 705 do 100 j=1,i 706 ij=ij+1 707 b(ij)=a(iad+j) 708 100 continue 709 200 continue 710 return 711c 712 end 713 subroutine quadbfta(a,b,c,m) 714 implicit real*8 (a-h,o-z) 715 dimension a(1),b(1) 716c 717c make a quadratic symmetric or antisymmetric matrix b from 718c triangular matrix a 719c 720 c1=c 721 con=abs(c1) 722 ij=0 723 do 10 i=1,m 724 iad=i*m-m 725 do 20 j=1,i 726 jad=j*m-m 727 ij=ij+1 728 b(iad+j)=con*a(ij) 729 b(jad+i)=c1*a(ij) 730 20 continue 731 b(iad+i)=b(iad+i)*(c1+con)/2 732 10 continue 733 return 734c 735 end 736 subroutine vecmat(amat,istcol,icol,vecin,vecout,idime) 737 implicit real*8 (a-h,o-z) 738 dimension amat(1), vecin(1), vecout(1) 739c 740c multiplies amat(t) times a vector starting from stcol to stcol+ind 741c looks like josep bofill creation 742 ist=istcol 743 ic=icol 744 id=idime 745 indcol=ist+ic-1 746 do 10 i=ist,indcol 747 nad=i*id-id 748 sum=0.0 749 do 20 j=1,id 750 sum=sum+amat(j+nad)*vecin(j) 751 20 continue 752 vecout(i)=sum 753 10 continue 754 return 755c 756 end 757 subroutine matmat(a,ias,ia,b,ibs,ib,d,id) 758 implicit real*8 (a-h,o-z) 759 dimension a(1),b(1),d(1) 760c 761c multiplies a(t) x b with common dimension id, result in d 762c 763 ja=ia 764 jas=ias 765 jb=ib 766 jbs=ibs 767 jae=ja+jas-1 768 jbe=jb+jbs-1 769 idim=id 770 do 30 i=jas,jae 771 iad=i*idim-idim 772 do 20 j=jbs,jbe 773 jad=j*idim-idim 774 sum=0.0 775 do 10 k=1,idim 776 sum=sum+a(k+iad)*b(k+jad) 777 10 continue 778 d(jad+i)=sum 779 20 continue 780 30 continue 781 return 782c 783 end 784 subroutine eig (b,a,d,n) 785 implicit real*8 (a-h,o-z) 786 dimension a(n,n), b(1), d(1) 787 ij=0 788 do 10 i=1,n 789 do 10 j=1,i 790 ij=ij+1 791 a(i,j)=b(ij) 792 10 continue 793 call sdiag2 (n,n,a,d,a) 794 return 795c 796c 797 end 798 subroutine spur (a,b,ncf,s) 799 implicit real*8 (a-h,o-z) 800 dimension a(1), b(1) 801c 802c .... spur(ab) - both are synmmetrical triangular matrices 803c 804cccc n=ncf 805 s=zero 806 ij=0 807 do 10 i=1,ncf 808 do 10 j=1,i 809 ij=ij+1 810 s=s+a(ij)*b(ij) 811 if (i.eq.j) s=s-a(ij)*b(ij)*0.5d0 812 10 continue 813 s=2*s 814 return 815c 816 end 817 subroutine txs_trans (a,ncf) 818 implicit real*8 (a-h,o-z) 819 dimension a(ncf,ncf) 820 do 10 i=1,ncf 821 do 10 j=1,i 822 s=a(i,j) 823 a(i,j)=a(j,i) 824 a(j,i)=s 825 10 continue 826 return 827c 828 end 829c====================================================================== 830c linking routine between mamu and mxma which translates existing calls 831c of mamu directly into calls of mxma. 832c richard g.a. bone, march, 1991. 833c main interpretative problem is that mamu performs b*a matrix multiplication 834c whereas mxma performs a*b. 835c mamu works with either square or triangular matrices according to 836c arguments k,l,m, whereas mxma requires squares only. 837c n.b. in pulay group mxma may not take the same array address twice. 838c 839c subroutine mamu (b,a,c,k,l,m,n,w) 840c implicit double precision (a-h,o-z) 841c dimension a(1),b(1),c(1),w(1) 842c common /big/bl(1) 843c common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap, 844c 1 npl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200), 845c 2 icode(200),inpf,ioutf 846c nsq = n**2 847c ntri = n*(n+1)/2 848c 849c if((k.ne.0).and.(k.ne.1)) goto 9998 850c if((l.ne.0).and.(l.ne.1)) goto 9998 851c if((m.ne.0).and.(m.ne.1)) goto 9998 852c 853c call getmem(nsq,ia) 854c call getmem(nsq,ic) 855c 856c if (l.eq.0) call squr(a,bl(ia),n) 857c if (l.eq.1) call tfer(a,bl(ia),nsq) 858c 859c if (k.eq.0) then 860c call getmem(nsq,ib) 861c call squr(b,bl(ib),n) 862c call mxma(bl(ia),1,n,bl(ib),1,n,bl(ic),1,n,n,n,n) 863c call retmem(1) 864c endif 865c if (k.eq.1) then 866c ir=ir-1 867c call mxma(bl(ia),1,n,b,1,n,bl(ic),1,n,n,n,n) 868c endif 869c 870c if (m.eq.0) call tri(bl(ic),c,n) 871c if (m.eq.1) call tfer(bl(ic),c,nsq) 872c 873c call retmem(ir) 874c 875c call retmem(2) 876c return 877c9998 write(icond,*) 878c 1 'incorrect use of mamu: integer arguments must be 1 or 0' 879c return 880c end 881c 882c====================================================================== 883 subroutine squr(t,sq,n) 884 implicit real*8(a-h,o-z) 885c triangle to square 886 dimension sq(1),t(1) 887 ij=0 888 ii=0 889 do 20 i=1,n 890 jj=0 891c$dir no_recurrence 892cdir$ ivdep 893 do 10 j=1,i 894 ij=ij+1 895 sq(ii+j)=t(ij) 896 sq(jj+i)=t(ij) 89710 jj=jj+n 89820 ii=ii+n 899 return 900 end 901 subroutine mxma(a,mcola,mrowa,b,mcolb,mrowb, 902 1 r,mcolr,mrowr,ncol,nlink,nrow) 903c this code should run at about 34 mflops for matrices 904c of dimensions between 50 and 500 on a 25 mhz rs6000 905 implicit real*8 (a-h,o-z) 906 dimension a(*),b(*),r(*) 907 parameter (nb=60) 908c written based on tips by ron bell, ibm united kingdom, ltd. 909c seeibm document no. gg24-3611 910c note that this code, in spite of all efforts, does not reach 911c the numerical performance (43 mflops/s) claimed in the above 912c publication. it is assumed now that the performance figures 913c of bell refer to the inner loop only and do not reflect 914c the overhead. the maximum performance of this code is about 915c 35 mflops/s 916 dimension rr(nb,nb),aa(nb,nb),bb(nb,nb) 917c performs r=a*b; the actual dimensions are naxma,maxmb,naxmb 918c the addressing function is: r(i,j)=r((i-1)*mcolr+(j-1)*mrowr+1) 919c a(i,k)=a((i-1)*mcola+(k-1)*mrowa+1) 920c b(k,j)=b((k-1)*mcolb+(j-1)*mrowb+1) 921c good example of obscure addressing in fortran 922c please do not write code like this. unfortunately, this piece of 923c code is widely used on the crays 924c the buffer size has been roughly optimized for the apollo dn10k 925 ir=1 926 do 3 j=1,nrow 927 irr=ir 928 do 2 i=1,ncol 929c ncol is the number of rows. the dutch mind is perplexing 930 r(irr)=0.0d0 931 irr=irr+mcolr 932 2 continue 933 ir=ir+mrowr 934 3 continue 935 do 1400 ii=1,ncol,nb 936 ie=min0(ii+nb-1,ncol) 937 ie1=ie-ii+1 938 ie2=3*(ie1/3) 939c write(*,*) ie,ie1 940 do 1300 jj=1,nrow,nb 941 je=min0(jj+nb-1,nrow) 942 je1=je-jj+1 943 je2=3*(je1/3) 944c make sure that ie2 and je2 are divisible by 3 for the 945c loop unrolling 946 ir=(ii-1)*mcolr+(jj-1)*mrowr+1 947 do 200 i=1,ie1 948 irr=ir 949 do 100 j=1,je1 950c rr(i,j)=r(irr) 951 rr(i,j)=0.0d0 952 irr=irr+mrowr 953 100 continue 954 ir=ir+mcolr 955 200 continue 956 do 1000 kk=1,nlink,nb 957 ke=min0(kk+nb-1,nlink) 958 ke1=ke-kk+1 959 ia=(ii-1)*mcola+(kk-1)*mrowa+1 960 do 400 i=1,ie1 961 iaa=ia 962 do 300 k=1,ke1 963 aa(k,i)=a(iaa) 964 iaa=iaa+mrowa 965 300 continue 966 ia=ia+mcola 967 400 continue 968 ib=(kk-1)*mcolb+(jj-1)*mrowb+1 969 do 600 j=1,je1 970 ibb=ib 971 do 500 k=1,ke1 972 bb(k,j)=b(ibb) 973 ibb=ibb+mcolb 974 500 continue 975 ib=ib+mrowb 976 600 continue 977 do 900 i=1,ie2,3 978 do 800 j=1,je2,3 979 s00=rr(i,j) 980 s10=rr(i+1,j) 981 s20=rr(i+2,j) 982 s01=rr(i,j+1) 983 s11=rr(i+1,j+1) 984 s21=rr(i+2,j+1) 985 s02=rr(i,j+2) 986 s12=rr(i+1,j+2) 987 s22=rr(i+2,j+2) 988 do 700 k=1,ke1 989 s00=s00+aa(k,i)*bb(k,j) 990 s01=s01+aa(k,i)*bb(k,j+1) 991 s02=s02+aa(k,i)*bb(k,j+2) 992 s10=s10+aa(k,i+1)*bb(k,j) 993 s11=s11+aa(k,i+1)*bb(k,j+1) 994 s12=s12+aa(k,i+1)*bb(k,j+2) 995 s20=s20+aa(k,i+2)*bb(k,j) 996 s21=s21+aa(k,i+2)*bb(k,j+1) 997 s22=s22+aa(k,i+2)*bb(k,j+2) 998 700 continue 999 rr(i,j)=s00 1000 rr(i+1,j)=s10 1001 rr(i+2,j)=s20 1002 rr(i,j+1)=s01 1003 rr(i+1,j+1)=s11 1004 rr(i+2,j+1)=s21 1005 rr(i,j+2)=s02 1006 rr(i+1,j+2)=s12 1007 rr(i+2,j+2)=s22 1008 800 continue 1009 do 820 j=je2+1,je1 1010 s00=rr(i,j) 1011 s10=rr(i+1,j) 1012 s20=rr(i+2,j) 1013 do 810 k=1,ke1 1014 s00=s00+aa(k,i)*bb(k,j) 1015 s10=s10+aa(k,i+1)*bb(k,j) 1016 s20=s20+aa(k,i+2)*bb(k,j) 1017 810 continue 1018 rr(i,j)=s00 1019 rr(i+1,j)=s10 1020 rr(i+2,j)=s20 1021 820 continue 1022 900 continue 1023 do 950 i=ie2+1,ie1 1024 do 940 j=1,je1 1025 do 930 k=1,ke1 1026 rr(i,j)=rr(i,j)+aa(k,i)*bb(k,j) 1027 930 continue 1028 940 continue 1029 950 continue 1030 1000 continue 1031 ir=(ii-1)*mcolr+(jj-1)*mrowr+1 1032 do 1200 i=1,ie1 1033 irr=ir 1034 do 1100 j=1,je1 1035 r(irr)=r(irr)+rr(i,j) 1036 irr=irr+mrowr 1037 1100 continue 1038 ir=ir+mcolr 1039 1200 continue 1040 1300 continue 1041 1400 continue 1042 end 1043 subroutine nerror(noer,routine,message,n1,n2) 1044#include "errquit.fh" 1045 character*(*) routine 1046 character*(*) message 1047 common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap,n 1048 1pl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200),icode(20 1049 20),inpf,ioutf 1050 write(iout,*) 'Error no. ',noer,' in ',routine,' ',message,n1,n2 1051 write(icond,*) 'Error no. ',noer,' in ',routine,' ',message,n1,n2 1052c100 format(' Error No.',i4,' in ',a20,2x,/,a80,/,' variables ',2i6) 1053 call errquit('texas: nerror called', 0, INT_ERR) 1054* stop 20 1055 end 1056 subroutine txs_message(routine,messag1,n1,n2) 1057 character*(*) routine 1058 character*(*) messag1 1059 common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap,n 1060 1pl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200),icode(20 1061 20),inpf,ioutf 1062 write(iout,*) 'Message from ',routine,' ',messag1,n1,n2 1063 write(icond,*) 'Message from ',routine,' ',messag1,n1,n2 1064 end 1065 subroutine txs_error 1066#include "errquit.fh" 1067 common /tape/ inp,inp2,iout,ipun,ix,icond,itest,nentry,ltab,ntap,n 1068 1pl(9),nbl(9),nen(9),lentry(9),nam(200),num(200),irec(200),icode(20 1069 20),inpf,ioutf 1070 write(iout,*) ' error found' 1071 write(icond,*) ' error found' 1072* stop 10 1073 call errquit('texas: txs_error called', 0, INT_ERR) 1074 end 1075c================================================================== 1076 subroutine dxypz(n,dx,incx, dy,incy, dz,incz) 1077c 1078c vector_x*vector_y + vector_z ===> vector_z 1079c 1080c modification of the daxpy blas routine. (KW) 1081c 1082c 1083 double precision dx(*),dy(*),dz(*) 1084 integer i,incx,incy,incz, ix,iy,iz, m,mp1,n 1085c 1086 if(n.le.0)return 1087 if(incx.eq.1.and.incy.eq.1.and.incz.eq.1)go to 20 1088c 1089c code for unequal increments or equal increments 1090c not equal to 1 1091c 1092 ix = 1 1093 iy = 1 1094 iz = 1 1095 if(incx.lt.0) ix= (-n+1)*incx + 1 1096 if(incy.lt.0) iy= (-n+1)*incy + 1 1097 if(incz.lt.0) iz =(-n+1)*incz + 1 1098c 1099 do 10 i = 1,n 1100 dz(iz) = dz(iz) + dy(iy)*dx(ix) 1101 ix = ix + incx 1102 iy = iy + incy 1103 iz = iz + incz 1104 10 continue 1105c 1106 return 1107c 1108c code for both increments equal to 1 1109c 1110c 1111c clean-up loop 1112c 1113 20 m = mod(n,4) 1114 if( m .eq. 0 ) go to 40 1115 do 30 i = 1,m 1116 dz(i) = dz(i) + dy(i)*dx(i) 1117 30 continue 1118 if( n .lt. 4 ) return 1119 40 mp1 = m + 1 1120 do 50 i0= mp1,n,4 1121 i1=i0+1 1122 i2=i1+1 1123 i3=i2+1 1124 dz(i0) = dz(i0) + dy(i0)*dx(i0) 1125 dz(i1) = dz(i1) + dy(i1)*dx(i1) 1126 dz(i2) = dz(i2) + dy(i2)*dx(i2) 1127 dz(i3) = dz(i3) + dy(i3)*dx(i3) 1128 50 continue 1129 return 1130 end 1131c================================================================== 1132 subroutine dxyz(n,dx,incx, dy,incy, dz,incz) 1133c 1134c vector_x*vector_y ===> vector_z 1135c 1136c modification of the daxpy blas routine. (KW) 1137c 1138c 1139 double precision dx(*),dy(*),dz(*) 1140 integer i,incx,incy,incz, ix,iy,iz, m,mp1,n 1141c 1142 if(n.le.0)return 1143 if(incx.eq.1.and.incy.eq.1.and.incz.eq.1)go to 20 1144c 1145c code for unequal increments or equal increments 1146c not equal to 1 1147c 1148 ix = 1 1149 iy = 1 1150 iz = 1 1151 if(incx.lt.0) ix= (-n+1)*incx + 1 1152 if(incy.lt.0) iy= (-n+1)*incy + 1 1153 if(incz.lt.0) iz =(-n+1)*incz + 1 1154c 1155 do 10 i = 1,n 1156 dz(iz) = dy(iy)*dx(ix) 1157 ix = ix + incx 1158 iy = iy + incy 1159 iz = iz + incz 1160 10 continue 1161c 1162 return 1163c 1164c code for both increments equal to 1 1165c 1166c 1167c clean-up loop 1168c 1169 20 m = mod(n,4) 1170 if( m .eq. 0 ) go to 40 1171 do 30 i = 1,m 1172 dz(i) = dy(i)*dx(i) 1173 30 continue 1174 if( n .lt. 4 ) return 1175 40 mp1 = m + 1 1176 do 50 i0= mp1,n,4 1177 i1=i0+1 1178 i2=i1+1 1179 i3=i2+1 1180 dz(i0) = dy(i0)*dx(i0) 1181 dz(i1) = dy(i1)*dx(i1) 1182 dz(i2) = dy(i2)*dx(i2) 1183 dz(i3) = dy(i3)*dx(i3) 1184 50 continue 1185 return 1186 end 1187c================================================================== 1188 subroutine dxpy(n,dx,incx, dy,incy) 1189c 1190c vector_x+vector_y ===> vector_y 1191c 1192c modification of the daxpy blas routine. (KW) 1193c 1194c 1195 double precision dx(*),dy(*) 1196 integer i,incx,incy, ix,iy, m,mp1,n 1197c 1198 if(n.le.0)return 1199 if(incx.eq.1.and.incy.eq.1)go to 20 1200c 1201c code for unequal increments or equal increments 1202c not equal to 1 1203c 1204 ix = 1 1205 iy = 1 1206 if(incx.lt.0) ix= (-n+1)*incx + 1 1207 if(incy.lt.0) iy= (-n+1)*incy + 1 1208c 1209 do 10 i = 1,n 1210 dy(iy) = dy(iy) + dx(ix) 1211 ix = ix + incx 1212 iy = iy + incy 1213 10 continue 1214c 1215 return 1216c 1217c code for both increments equal to 1 1218c 1219c 1220c clean-up loop 1221c 1222 20 m = mod(n,4) 1223 if( m .eq. 0 ) go to 40 1224 do 30 i = 1,m 1225 dy(i) = dy(i) + dx(i) 1226 30 continue 1227 if( n .lt. 4 ) return 1228 40 mp1 = m + 1 1229 do 50 i0= mp1,n,4 1230 i1=i0+1 1231 i2=i1+1 1232 i3=i2+1 1233 dy(i0) = dy(i0) + dx(i0) 1234 dy(i1) = dy(i1) + dx(i1) 1235 dy(i2) = dy(i2) + dx(i2) 1236 dy(i3) = dy(i3) + dx(i3) 1237 50 continue 1238 return 1239 end 1240c================================================================== 1241 subroutine get_ij_half(ij, i, j) 1242c 1243c extracts indeces i & j from common index ij=i*(i-1)/2 + j 1244c 1245 implicit none 1246 integer ij, i, j 1247 intrinsic sqrt, float 1248c 1249 i = sqrt(float(ij + ij)) 1250 j = ij - i*(i-1)/2 1251 if (i .lt. j) then 1252 i = i + 1 1253 j = ij - i*(i-1)/2 1254 endif 1255c 1256 end 1257c----------------------------------------------------------------- 1258 subroutine get_ij_full(ij,nj, i, j) 1259c 1260c extracts indeces i and j from common index ij=(i-1)*nj + j 1261c 1262 implicit none 1263 integer ij,nj, i, j 1264c 1265 i=ij/nj+1 1266 j=ij-(i-1)*nj 1267c 1268 if(j.eq.0) then 1269 i=i-1 1270 j=ij-(i-1)*nj 1271 endif 1272c 1273 end 1274c----------------------------------------------------------------- 1275