contrib/Sparskit/unary.f

c $Id: unary.f,v 1.1 2008-04-11 06:01:06 geuzaine Exp $
c----------------------------------------------------------------------c
c                          S P A R S K I T                             c
c----------------------------------------------------------------------c
c                     UNARY SUBROUTINES MODULE                         c
c----------------------------------------------------------------------c
c contents:                                                            c
c----------                                                            c
c submat : extracts a submatrix from a sparse matrix.                  c
c filter : filters elements from a matrix according to their magnitude.c
c filterm: same as above, but for the MSR format                       c
c csort  : sorts the elements in increasing order of columns           c
c clncsr : clean up the CSR format matrix, remove duplicate entry, etc c
c transp : in-place transposition routine (see also csrcsc in formats) c
c copmat : copy of a matrix into another matrix (both stored csr)      c
c msrcop : copies a matrix in MSR format into a matrix in MSR format   c
c getelm : returns a(i,j) for any (i,j) from a CSR-stored matrix.      c
c getdia : extracts a specified diagonal from a matrix.                c
c getl   : extracts lower triangular part                              c
c getu   : extracts upper triangular part                              c
c levels : gets the level scheduling structure for lower triangular    c
c          matrices.                                                   c
c amask  : extracts     C = A mask M                                   c
c rperm  : permutes the rows of a matrix (B = P A)                     c
c cperm  : permutes the columns of a matrix (B = A Q)                  c
c dperm  : permutes both the rows and columns of a matrix (B = P A Q ) c
c dperm1 : general extractiob routine (extracts arbitrary rows)        c
c dperm2 : general submatrix permutation/extraction routine            c
c dmperm : symmetric permutation of row and column (B=PAP') in MSR fmt c
c dvperm : permutes a real vector (in-place)                           c
c ivperm : permutes an integer vector (in-place)                       c
c retmx  : returns the max absolute value in each row of the matrix    c
c diapos : returns the positions of the diagonal elements in A.        c
c extbdg : extracts the main diagonal blocks of a matrix.              c
c getbwd : returns the bandwidth information on a matrix.              c
c blkfnd : finds the block-size of a matrix.                           c
c blkchk : checks whether a given integer is the block size of A.      c
c infdia : obtains information on the diagonals of A.                  c
c amubdg : gets number of nonzeros in each row of A*B (as well as NNZ) c
c aplbdg : gets number of nonzeros in each row of A+B (as well as NNZ) c
c rnrms  : computes the norms of the rows of A                         c
c cnrms  : computes the norms of the columns of A                      c
c roscal : scales the rows of a matrix by their norms.                 c
c coscal : scales the columns of a matrix by their norms.              c
c addblk : Adds a matrix B into a block of A.                          c
c get1up : Collects the first elements of each row of the upper        c
c          triangular portion of the matrix.                           c
c xtrows : extracts given rows from a matrix in CSR format.            c
c csrkvstr:  Finds block row partitioning of matrix in CSR format      c
c csrkvstc:  Finds block column partitioning of matrix in CSR format   c
c kvstmerge: Merges block partitionings, for conformal row/col pattern c
c----------------------------------------------------------------------c
      subroutine submat (n,job,i1,i2,j1,j2,a,ja,ia,nr,nc,ao,jao,iao)
      integer n,job,i1,i2,j1,j2,nr,nc,ia(*),ja(*),jao(*),iao(*)
      real*8 a(*),ao(*)
c-----------------------------------------------------------------------
c extracts the submatrix A(i1:i2,j1:j2) and puts the result in
c matrix ao,iao,jao
c---- In place: ao,jao,iao may be the same as a,ja,ia.
c--------------
c on input
c---------
c n	= row dimension of the matrix
c i1,i2 = two integers with i2 .ge. i1 indicating the range of rows to be
c          extracted.
c j1,j2 = two integers with j2 .ge. j1 indicating the range of columns
c         to be extracted.
c         * There is no checking whether the input values for i1, i2, j1,
c           j2 are between 1 and n.
c a,
c ja,
c ia    = matrix in compressed sparse row format.
c
c job	= job indicator: if job .ne. 1 then the real values in a are NOT
c         extracted, only the column indices (i.e. data structure) are.
c         otherwise values as well as column indices are extracted...
c
c on output
c--------------
c nr	= number of rows of submatrix
c nc	= number of columns of submatrix
c	  * if either of nr or nc is nonpositive the code will quit.
c
c ao,
c jao,iao = extracted matrix in general sparse format with jao containing
c	the column indices,and iao being the pointer to the beginning
c	of the row,in arrays a,ja.
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
      nr = i2-i1+1
      nc = j2-j1+1
c
      if ( nr .le. 0 .or. nc .le. 0) return
c
      klen = 0
c
c     simple procedure. proceeds row-wise...
c
      do 100 i = 1,nr
         ii = i1+i-1
         k1 = ia(ii)
         k2 = ia(ii+1)-1
         iao(i) = klen+1
c-----------------------------------------------------------------------
         do 60 k=k1,k2
            j = ja(k)
            if (j .ge. j1 .and. j .le. j2) then
               klen = klen+1
               if (job .eq. 1) ao(klen) = a(k)
               jao(klen) = j - j1+1
            endif
 60      continue
 100  continue
      iao(nr+1) = klen+1
      return
c------------end-of submat----------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine filter(n,job,drptol,a,ja,ia,b,jb,ib,len,ierr)
      real*8 a(*),b(*),drptol
      integer ja(*),jb(*),ia(*),ib(*),n,job,len,ierr
c-----------------------------------------------------------------------
c     This module removes any elements whose absolute value
c     is small from an input matrix A and puts the resulting
c     matrix in B.  The input parameter job selects a definition
c     of small.
c-----------------------------------------------------------------------
c on entry:
c---------
c  n	 = integer. row dimension of matrix
c  job   = integer. used to determine strategy chosen by caller to
c         drop elements from matrix A.
c          job = 1
c              Elements whose absolute value is less than the
c              drop tolerance are removed.
c          job = 2
c              Elements whose absolute value is less than the
c              product of the drop tolerance and the Euclidean
c              norm of the row are removed.
c          job = 3
c              Elements whose absolute value is less that the
c              product of the drop tolerance and the largest
c              element in the row are removed.
c
c drptol = real. drop tolerance used for dropping strategy.
c a
c ja
c ia     = input matrix in compressed sparse format
c len	 = integer. the amount of space available in arrays b and jb.
c
c on return:
c----------
c b
c jb
c ib    = resulting matrix in compressed sparse format.
c
c ierr	= integer. containing error message.
c         ierr .eq. 0 indicates normal return
c         ierr .gt. 0 indicates that there is'nt enough
c         space is a and ja to store the resulting matrix.
c         ierr then contains the row number where filter stopped.
c note:
c------ This module is in place. (b,jb,ib can ne the same as
c       a, ja, ia in which case the result will be overwritten).
c----------------------------------------------------------------------c
c           contributed by David Day,  Sep 19, 1989.                   c
c----------------------------------------------------------------------c
c local variables
      real*8 norm,loctol
      integer index,row,k,k1,k2
c
      index = 1
      do 10 row= 1,n
         k1 = ia(row)
         k2 = ia(row+1) - 1
         ib(row) = index
	 goto (100,200,300) job
 100     norm = 1.0d0
         goto 400
 200     norm = 0.0d0
         do 22 k = k1,k2
            norm = norm + a(k) * a(k)
 22      continue
         norm = sqrt(norm)
         goto 400
 300     norm = 0.0d0
         do 23 k = k1,k2
            if( abs(a(k))  .gt. norm) then
               norm = abs(a(k))
            endif
 23      continue
 400     loctol = drptol * norm
	 do 30 k = k1,k2
	    if( abs(a(k)) .gt. loctol)then
               if (index .gt. len) then
               ierr = row
               return
            endif
            b(index) =  a(k)
            jb(index) = ja(k)
            index = index + 1
         endif
 30   continue
 10   continue
      ib(n+1) = index
      return
c--------------------end-of-filter -------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine filterm (n,job,drop,a,ja,b,jb,len,ierr)
      real*8 a(*),b(*),drop
      integer ja(*),jb(*),n,job,len,ierr
c-----------------------------------------------------------------------
c     This subroutine removes any elements whose absolute value
c     is small from an input matrix A. Same as filter but
c     uses the MSR format.
c-----------------------------------------------------------------------
c on entry:
c---------
c  n	 = integer. row dimension of matrix
c  job   = integer. used to determine strategy chosen by caller to
c         drop elements from matrix A.
c          job = 1
c              Elements whose absolute value is less than the
c              drop tolerance are removed.
c          job = 2
c              Elements whose absolute value is less than the
c              product of the drop tolerance and the Euclidean
c              norm of the row are removed.
c          job = 3
c              Elements whose absolute value is less that the
c              product of the drop tolerance and the largest
c              element in the row are removed.
c
c drop = real. drop tolerance used for dropping strategy.
c a
c ja     = input matrix in Modifief Sparse Row format
c len	 = integer. the amount of space in arrays b and jb.
c
c on return:
c----------
c
c b, jb = resulting matrix in Modifief Sparse Row format
c
c ierr	= integer. containing error message.
c         ierr .eq. 0 indicates normal return
c         ierr .gt. 0 indicates that there is'nt enough
c         space is a and ja to store the resulting matrix.
c         ierr then contains the row number where filter stopped.
c note:
c------ This module is in place. (b,jb can ne the same as
c       a, ja in which case the result will be overwritten).
c----------------------------------------------------------------------c
c           contributed by David Day,  Sep 19, 1989.                   c
c----------------------------------------------------------------------c
c local variables
c
      real*8 norm,loctol
      integer index,row,k,k1,k2
c
      index = n+2
      do 10 row= 1,n
         k1 = ja(row)
         k2 = ja(row+1) - 1
         jb(row) = index
	 goto (100,200,300) job
 100     norm = 1.0d0
         goto 400
 200     norm = a(row)**2
         do 22 k = k1,k2
            norm = norm + a(k) * a(k)
 22      continue
         norm = sqrt(norm)
         goto 400
 300     norm = abs(a(row))
         do 23 k = k1,k2
            norm = max(abs(a(k)),norm)
 23      continue
 400     loctol = drop * norm
	 do 30 k = k1,k2
	    if( abs(a(k)) .gt. loctol)then
               if (index .gt. len) then
                  ierr = row
                  return
               endif
               b(index) =  a(k)
               jb(index) = ja(k)
               index = index + 1
            endif
 30      continue
 10   continue
      jb(n+1) = index
      return
c--------------------end-of-filterm-------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine csort (n,a,ja,ia,iwork,values)
      logical values
      integer n, ja(*), ia(n+1), iwork(*)
      real*8 a(*)
c-----------------------------------------------------------------------
c This routine sorts the elements of  a matrix (stored in Compressed
c Sparse Row Format) in increasing order of their column indices within
c each row. It uses a form of bucket sort with a cost of O(nnz) where
c nnz = number of nonzero elements.
c requires an integer work array of length 2*nnz.
c-----------------------------------------------------------------------
c on entry:
c---------
c n     = the row dimension of the matrix
c a     = the matrix A in compressed sparse row format.
c ja    = the array of column indices of the elements in array a.
c ia    = the array of pointers to the rows.
c iwork = integer work array of length max ( n+1, 2*nnz )
c         where nnz = 2* (ia(n+1)-ia(1))  ) .
c values= logical indicating whether or not the real values a(*) must
c         also be permuted. if (.not. values) then the array a is not
c         touched by csort and can be a dummy array.
c
c on return:
c----------
c the matrix stored in the structure a, ja, ia is permuted in such a
c way that the column indices are in increasing order within each row.
c iwork(1:nnz) contains the permutation used  to rearrange the elements.
c-----------------------------------------------------------------------
c Y. Saad - Feb. 1, 1991.
c-----------------------------------------------------------------------
c local variables
      integer i, k, j, ifirst, nnz, next
c
c count the number of elements in each column
c
      do 1 i=1,n+1
         iwork(i) = 0
 1    continue
      do 3 i=1, n
         do 2 k=ia(i), ia(i+1)-1
            j = ja(k)+1
            iwork(j) = iwork(j)+1
 2       continue
 3    continue
c
c compute pointers from lengths.
c
      iwork(1) = 1
      do 4 i=1,n
         iwork(i+1) = iwork(i) + iwork(i+1)
 4    continue
c
c get the positions of the nonzero elements in order of columns.
c
      ifirst = ia(1)
      nnz = ia(n+1)-ifirst
      do 5 i=1,n
         do 51 k=ia(i),ia(i+1)-1
            j = ja(k)
            next = iwork(j)
            iwork(nnz+next) = k
            iwork(j) = next+1
 51      continue
 5    continue
c
c convert to coordinate format
c
      do 6 i=1, n
         do 61 k=ia(i), ia(i+1)-1
            iwork(k) = i
 61      continue
 6    continue
c
c loop to find permutation: for each element find the correct
c position in (sorted) arrays a, ja. Record this in iwork.
c
      do 7 k=1, nnz
         ko = iwork(nnz+k)
         irow = iwork(ko)
         next = ia(irow)
c
c the current element should go in next position in row. iwork
c records this position.
c
         iwork(ko) = next
         ia(irow)  = next+1
 7       continue
c
c perform an in-place permutation of the  arrays.
c
         call ivperm (nnz, ja(ifirst), iwork)
         if (values) call dvperm (nnz, a(ifirst), iwork)
c
c reshift the pointers of the original matrix back.
c
      do 8 i=n,1,-1
         ia(i+1) = ia(i)
 8    continue
      ia(1) = ifirst
c
      return
c---------------end-of-csort--------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine clncsr(job,value2,nrow,a,ja,ia,indu,iwk)
c     .. Scalar Arguments ..
      integer job, nrow, value2
c     ..
c     .. Array Arguments ..
      integer ia(nrow+1),indu(nrow),iwk(nrow+1),ja(*)
      real*8  a(*)
c     ..
c
c     This routine performs two tasks to clean up a CSR matrix
c     -- remove duplicate/zero entries,
c     -- perform a partial ordering, new order lower triangular part,
c        main diagonal, upper triangular part.
c
c     On entry:
c
c     job   = options
c         0 -- nothing is done
c         1 -- eliminate duplicate entries, zero entries.
c         2 -- eliminate duplicate entries and perform partial ordering.
c         3 -- eliminate duplicate entries, sort the entries in the
c              increasing order of clumn indices.
c
c     value2  -- 0 the matrix is pattern only (a is not touched)
c                1 matrix has values too.
c     nrow    -- row dimension of the matrix
c     a,ja,ia -- input matrix in CSR format
c
c     On return:
c     a,ja,ia -- cleaned matrix
c     indu    -- pointers to the beginning of the upper triangular
c                portion if job > 1
c
c     Work space:
c     iwk     -- integer work space of size nrow+1
c
c     .. Local Scalars ..
      integer i,j,k,ko,ipos,kfirst,klast
      real*8  tmp
c     ..
c
      if (job.le.0) return
c
c     .. eliminate duplicate entries --
c     array INDU is used as marker for existing indices, it is also the
c     location of the entry.
c     IWK is used to stored the old IA array.
c     matrix is copied to squeeze out the space taken by the duplicated
c     entries.
c
      do 90 i = 1, nrow
         indu(i) = 0
         iwk(i) = ia(i)
 90   continue
      iwk(nrow+1) = ia(nrow+1)
      k = 1
      do 120 i = 1, nrow
         ia(i) = k
         ipos = iwk(i)
         klast = iwk(i+1)
 100     if (ipos.lt.klast) then
            j = ja(ipos)
            if (indu(j).eq.0) then
c     .. new entry ..
               if (value2.ne.0) then
                  if (a(ipos) .ne. 0.0D0) then
                     indu(j) = k
                     ja(k) = ja(ipos)
                     a(k) = a(ipos)
                     k = k + 1
                  endif
               else
                  indu(j) = k
                  ja(k) = ja(ipos)
                  k = k + 1
               endif
            else if (value2.ne.0) then
c     .. duplicate entry ..
               a(indu(j)) = a(indu(j)) + a(ipos)
            endif
            ipos = ipos + 1
            go to 100
         endif
c     .. remove marks before working on the next row ..
         do 110 ipos = ia(i), k - 1
            indu(ja(ipos)) = 0
 110     continue
 120  continue
      ia(nrow+1) = k
      if (job.le.1) return
c
c     .. partial ordering ..
c     split the matrix into strict upper/lower triangular
c     parts, INDU points to the the beginning of the upper part.
c
      do 140 i = 1, nrow
         klast = ia(i+1) - 1
         kfirst = ia(i)
 130     if (klast.gt.kfirst) then
            if (ja(klast).lt.i .and. ja(kfirst).ge.i) then
c     .. swap klast with kfirst ..
               j = ja(klast)
               ja(klast) = ja(kfirst)
               ja(kfirst) = j
               if (value2.ne.0) then
                  tmp = a(klast)
                  a(klast) = a(kfirst)
                  a(kfirst) = tmp
               endif
            endif
            if (ja(klast).ge.i)
     &         klast = klast - 1
            if (ja(kfirst).lt.i)
     &         kfirst = kfirst + 1
            go to 130
         endif
c
         if (ja(klast).lt.i) then
            indu(i) = klast + 1
         else
            indu(i) = klast
         endif
 140  continue
      if (job.le.2) return
c
c     .. order the entries according to column indices
c     burble-sort is used
c
      do 190 i = 1, nrow
         do 160 ipos = ia(i), indu(i)-1
            do 150 j = indu(i)-1, ipos+1, -1
               k = j - 1
               if (ja(k).gt.ja(j)) then
                  ko = ja(k)
                  ja(k) = ja(j)
                  ja(j) = ko
                  if (value2.ne.0) then
                     tmp = a(k)
                     a(k) = a(j)
                     a(j) = tmp
                  endif
               endif
 150        continue
 160     continue
         do 180 ipos = indu(i), ia(i+1)-1
            do 170 j = ia(i+1)-1, ipos+1, -1
               k = j - 1
               if (ja(k).gt.ja(j)) then
                  ko = ja(k)
                  ja(k) = ja(j)
                  ja(j) = ko
                  if (value2.ne.0) then
                     tmp = a(k)
                     a(k) = a(j)
                     a(j) = tmp
                  endif
               endif
 170        continue
 180     continue
 190  continue
      return
c---- end of clncsr ----------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine copmat (nrow,a,ja,ia,ao,jao,iao,ipos,job)
      real*8 a(*),ao(*)
      integer nrow, ia(*),ja(*),jao(*),iao(*), ipos, job
c----------------------------------------------------------------------
c copies the matrix a, ja, ia, into the matrix ao, jao, iao.
c----------------------------------------------------------------------
c on entry:
c---------
c nrow	= row dimension of the matrix
c a,
c ja,
c ia    = input matrix in compressed sparse row format.
c ipos  = integer. indicates the position in the array ao, jao
c         where the first element should be copied. Thus
c         iao(1) = ipos on return.
c job   = job indicator. if (job .ne. 1) the values are not copies
c         (i.e., pattern only is copied in the form of arrays ja, ia).
c
c on return:
c----------
c ao,
c jao,
c iao   = output matrix containing the same data as a, ja, ia.
c-----------------------------------------------------------------------
c           Y. Saad, March 1990.
c-----------------------------------------------------------------------
c local variables
      integer kst, i, k
c
      kst    = ipos -ia(1)
      do 100 i = 1, nrow+1
         iao(i) = ia(i) + kst
 100  continue
c
      do 200 k=ia(1), ia(nrow+1)-1
         jao(kst+k)= ja(k)
 200  continue
c
      if (job .ne. 1) return
      do 201 k=ia(1), ia(nrow+1)-1
         ao(kst+k) = a(k)
 201  continue
c
      return
c--------end-of-copmat -------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine msrcop (nrow,a,ja,ao,jao,job)
      real*8 a(*),ao(*)
      integer nrow, ja(*),jao(*), job
c----------------------------------------------------------------------
c copies the MSR matrix a, ja, into the MSR matrix ao, jao
c----------------------------------------------------------------------
c on entry:
c---------
c nrow	= row dimension of the matrix
c a,ja  = input matrix in Modified compressed sparse row format.
c job   = job indicator. Values are not copied if job .ne. 1
c
c on return:
c----------
c ao, jao   = output matrix containing the same data as a, ja.
c-----------------------------------------------------------------------
c           Y. Saad,
c-----------------------------------------------------------------------
c local variables
      integer i, k
c
      do 100 i = 1, nrow+1
         jao(i) = ja(i)
 100  continue
c
      do 200 k=ja(1), ja(nrow+1)-1
         jao(k)= ja(k)
 200  continue
c
      if (job .ne. 1) return
      do 201 k=ja(1), ja(nrow+1)-1
         ao(k) = a(k)
 201  continue
      do 202 k=1,nrow
         ao(k) = a(k)
 202  continue
c
      return
c--------end-of-msrcop -------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      double precision function getelm (i,j,a,ja,ia,iadd,sorted)
c-----------------------------------------------------------------------
c     purpose:
c     --------
c     this function returns the element a(i,j) of a matrix a,
c     for any pair (i,j).  the matrix is assumed to be stored
c     in compressed sparse row (csr) format. getelm performs a
c     binary search in the case where it is known that the elements
c     are sorted so that the column indices are in increasing order.
c     also returns (in iadd) the address of the element a(i,j) in
c     arrays a and ja when the search is successsful (zero if not).
c-----
c     first contributed by noel nachtigal (mit).
c     recoded jan. 20, 1991, by y. saad [in particular
c     added handling of the non-sorted case + the iadd output]
c-----------------------------------------------------------------------
c     parameters:
c     -----------
c on entry:
c----------
c     i      = the row index of the element sought (input).
c     j      = the column index of the element sought (input).
c     a      = the matrix a in compressed sparse row format (input).
c     ja     = the array of column indices (input).
c     ia     = the array of pointers to the rows' data (input).
c     sorted = logical indicating whether the matrix is knonw to
c              have its column indices sorted in increasing order
c              (sorted=.true.) or not (sorted=.false.).
c              (input).
c on return:
c-----------
c     getelm = value of a(i,j).
c     iadd   = address of element a(i,j) in arrays a, ja if found,
c              zero if not found. (output)
c
c     note: the inputs i and j are not checked for validity.
c-----------------------------------------------------------------------
c     noel m. nachtigal october 28, 1990 -- youcef saad jan 20, 1991.
c-----------------------------------------------------------------------
      integer i, ia(*), iadd, j, ja(*)
      double precision a(*)
      logical sorted
c
c     local variables.
c
      integer ibeg, iend, imid, k
c
c     initialization
c
      iadd = 0
      getelm = 0.0
      ibeg = ia(i)
      iend = ia(i+1)-1
c
c     case where matrix is not necessarily sorted
c
      if (.not. sorted) then
c
c scan the row - exit as soon as a(i,j) is found
c
         do 5  k=ibeg, iend
            if (ja(k) .eq.  j) then
               iadd = k
               goto 20
            endif
 5       continue
c
c     end unsorted case. begin sorted case
c
      else
c
c     begin binary search.   compute the middle index.
c
 10      imid = ( ibeg + iend ) / 2
c
c     test if  found
c
         if (ja(imid).eq.j) then
            iadd = imid
            goto 20
         endif
         if (ibeg .ge. iend) goto 20
c
c     else     update the interval bounds.
c
         if (ja(imid).gt.j) then
            iend = imid -1
         else
            ibeg = imid +1
         endif
         goto 10
c
c     end both cases
c
      endif
c
 20   if (iadd .ne. 0) getelm = a(iadd)
c
      return
c--------end-of-getelm--------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine getdia (nrow,ncol,job,a,ja,ia,len,diag,idiag,ioff)
      real*8 diag(*),a(*)
      integer nrow, ncol, job, len, ioff, ia(*), ja(*), idiag(*)
c-----------------------------------------------------------------------
c this subroutine extracts a given diagonal from a matrix stored in csr
c format. the output matrix may be transformed with the diagonal removed
c from it if desired (as indicated by job.)
c-----------------------------------------------------------------------
c our definition of a diagonal of matrix is a vector of length nrow
c (always) which contains the elements in rows 1 to nrow of
c the matrix that are contained in the diagonal offset by ioff
c with respect to the main diagonal. if the diagonal element
c falls outside the matrix then it is defined as a zero entry.
c thus the proper definition of diag(*) with offset ioff is
c
c     diag(i) = a(i,ioff+i) i=1,2,...,nrow
c     with elements falling outside the matrix being defined as zero.
c
c-----------------------------------------------------------------------
c
c on entry:
c----------
c
c nrow	= integer. the row dimension of the matrix a.
c ncol	= integer. the column dimension of the matrix a.
c job   = integer. job indicator.  if job = 0 then
c         the matrix a, ja, ia, is not altered on return.
c         if job.ne.0  then getdia will remove the entries
c         collected in diag from the original matrix.
c         this is done in place.
c
c a,ja,
c    ia = matrix stored in compressed sparse row a,ja,ia,format
c ioff  = integer,containing the offset of the wanted diagonal
c	  the diagonal extracted is the one corresponding to the
c	  entries a(i,j) with j-i = ioff.
c	  thus ioff = 0 means the main diagonal
c
c on return:
c-----------
c len   = number of nonzero elements found in diag.
c         (len .le. min(nrow,ncol-ioff)-max(1,1-ioff) + 1 )
c
c diag  = real*8 array of length nrow containing the wanted diagonal.
c	  diag contains the diagonal (a(i,j),j-i = ioff ) as defined
c         above.
c
c idiag = integer array of  length len, containing the poisitions
c         in the original arrays a and ja of the diagonal elements
c         collected in diag. a zero entry in idiag(i) means that
c         there was no entry found in row i belonging to the diagonal.
c
c a, ja,
c    ia = if job .ne. 0 the matrix is unchanged. otherwise the nonzero
c         diagonal entries collected in diag are removed from the
c         matrix and therefore the arrays a, ja, ia will change.
c	  (the matrix a, ja, ia will contain len fewer elements)
c
c----------------------------------------------------------------------c
c     Y. Saad, sep. 21 1989 - modified and retested Feb 17, 1996.      c
c----------------------------------------------------------------------c
c     local variables
      integer istart, max, iend, i, kold, k, kdiag, ko
c
      istart = max(0,-ioff)
      iend = min(nrow,ncol-ioff)
      len = 0
      do 1 i=1,nrow
         idiag(i) = 0
	 diag(i) = 0.0d0
 1    continue
c
c     extract  diagonal elements
c
      do 6 i=istart+1, iend
         do 51 k= ia(i),ia(i+1) -1
            if (ja(k)-i .eq. ioff) then
               diag(i)= a(k)
               idiag(i) = k
               len = len+1
               goto 6
            endif
 51      continue
 6    continue
      if (job .eq. 0 .or. len .eq.0) return
c
c     remove diagonal elements and rewind structure
c
      ko = 0
      do  7 i=1, nrow
         kold = ko
         kdiag = idiag(i)
         do 71 k= ia(i), ia(i+1)-1
            if (k .ne. kdiag) then
               ko = ko+1
               a(ko) = a(k)
               ja(ko) = ja(k)
            endif
 71      continue
         ia(i) = kold+1
 7    continue
c
c     redefine ia(nrow+1)
c
      ia(nrow+1) = ko+1
      return
c------------end-of-getdia----------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine transp (nrow,ncol,a,ja,ia,iwk,ierr)
      integer nrow, ncol, ia(*), ja(*), iwk(*), ierr
      real*8 a(*)
c------------------------------------------------------------------------
c In-place transposition routine.
c------------------------------------------------------------------------
c this subroutine transposes a matrix stored in compressed sparse row
c format. the transposition is done in place in that the arrays a,ja,ia
c of the transpose are overwritten onto the original arrays.
c------------------------------------------------------------------------
c on entry:
c---------
c nrow	= integer. The row dimension of A.
c ncol	= integer. The column dimension of A.
c a	= real array of size nnz (number of nonzero elements in A).
c         containing the nonzero elements
c ja	= integer array of length nnz containing the column positions
c 	  of the corresponding elements in a.
c ia	= integer of size n+1, where n = max(nrow,ncol). On entry
c         ia(k) contains the position in a,ja of  the beginning of
c         the k-th row.
c
c iwk	= integer work array of same length as ja.
c
c on return:
c----------
c
c ncol	= actual row dimension of the transpose of the input matrix.
c         Note that this may be .le. the input value for ncol, in
c         case some of the last columns of the input matrix are zero
c         columns. In the case where the actual number of rows found
c         in transp(A) exceeds the input value of ncol, transp will
c         return without completing the transposition. see ierr.
c a,
c ja,
c ia	= contains the transposed matrix in compressed sparse
c         row format. The row dimension of a, ja, ia is now ncol.
c
c ierr	= integer. error message. If the number of rows for the
c         transposed matrix exceeds the input value of ncol,
c         then ierr is  set to that number and transp quits.
c         Otherwise ierr is set to 0 (normal return).
c
c Note:
c----- 1) If you do not need the transposition to be done in place
c         it is preferrable to use the conversion routine csrcsc
c         (see conversion routines in formats).
c      2) the entries of the output matrix are not sorted (the column
c         indices in each are not in increasing order) use csrcsc
c         if you want them sorted.
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c  modified Oct. 11, 1989.                                             c
c----------------------------------------------------------------------c
c local variables
      real*8 t, t1
      ierr = 0
      nnz = ia(nrow+1)-1
c
c     determine column dimension
c
      jcol = 0
      do 1 k=1, nnz
         jcol = max(jcol,ja(k))
 1    continue
      if (jcol .gt. ncol) then
         ierr = jcol
         return
      endif
c
c     convert to coordinate format. use iwk for row indices.
c
      ncol = jcol
c
      do 3 i=1,nrow
         do 2 k=ia(i),ia(i+1)-1
            iwk(k) = i
 2       continue
 3    continue
c     find pointer array for transpose.
      do 35 i=1,ncol+1
         ia(i) = 0
 35   continue
      do 4 k=1,nnz
         i = ja(k)
         ia(i+1) = ia(i+1)+1
 4    continue
      ia(1) = 1
c------------------------------------------------------------------------
      do 44 i=1,ncol
         ia(i+1) = ia(i) + ia(i+1)
 44   continue
c
c     loop for a cycle in chasing process.
c
      init = 1
      k = 0
 5    t = a(init)
      i = ja(init)
      j = iwk(init)
      iwk(init) = -1
c------------------------------------------------------------------------
 6    k = k+1
c     current row number is i.  determine  where to go.
      l = ia(i)
c     save the chased element.
      t1 = a(l)
      inext = ja(l)
c     then occupy its location.
      a(l)  = t
      ja(l) = j
c     update pointer information for next element to be put in row i.
      ia(i) = l+1
c     determine  next element to be chased
      if (iwk(l) .lt. 0) goto 65
      t = t1
      i = inext
      j = iwk(l)
      iwk(l) = -1
      if (k .lt. nnz) goto 6
      goto 70
 65   init = init+1
      if (init .gt. nnz) goto 70
      if (iwk(init) .lt. 0) goto 65
c     restart chasing --
      goto 5
 70   continue
      do 80 i=ncol,1,-1
         ia(i+1) = ia(i)
 80   continue
      ia(1) = 1
c
      return
c------------------end-of-transp ----------------------------------------
c------------------------------------------------------------------------
      end
c------------------------------------------------------------------------
      subroutine getl (n,a,ja,ia,ao,jao,iao)
      integer n, ia(*), ja(*), iao(*), jao(*)
      real*8 a(*), ao(*)
c------------------------------------------------------------------------
c this subroutine extracts the lower triangular part of a matrix
c and writes the result ao, jao, iao. The routine is in place in
c that ao, jao, iao can be the same as a, ja, ia if desired.
c-----------
c on input:
c
c n     = dimension of the matrix a.
c a, ja,
c    ia = matrix stored in compressed sparse row format.
c On return:
c ao, jao,
c    iao = lower triangular matrix (lower part of a)
c	stored in a, ja, ia, format
c note: the diagonal element is the last element in each row.
c i.e. in  a(ia(i+1)-1 )
c ao, jao, iao may be the same as a, ja, ia on entry -- in which case
c getl will overwrite the result on a, ja, ia.
c
c------------------------------------------------------------------------
c local variables
      real*8 t
      integer ko, kold, kdiag, k, i
c
c inititialize ko (pointer for output matrix)
c
      ko = 0
      do  7 i=1, n
         kold = ko
         kdiag = 0
         do 71 k = ia(i), ia(i+1) -1
            if (ja(k)  .gt. i) goto 71
            ko = ko+1
            ao(ko) = a(k)
            jao(ko) = ja(k)
            if (ja(k)  .eq. i) kdiag = ko
 71      continue
         if (kdiag .eq. 0 .or. kdiag .eq. ko) goto 72
c
c     exchange
c
         t = ao(kdiag)
         ao(kdiag) = ao(ko)
         ao(ko) = t
c
         k = jao(kdiag)
         jao(kdiag) = jao(ko)
         jao(ko) = k
 72      iao(i) = kold+1
 7    continue
c     redefine iao(n+1)
      iao(n+1) = ko+1
      return
c----------end-of-getl -------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine getu (n,a,ja,ia,ao,jao,iao)
      integer n, ia(*), ja(*), iao(*), jao(*)
      real*8 a(*), ao(*)
c------------------------------------------------------------------------
c this subroutine extracts the upper triangular part of a matrix
c and writes the result ao, jao, iao. The routine is in place in
c that ao, jao, iao can be the same as a, ja, ia if desired.
c-----------
c on input:
c
c n     = dimension of the matrix a.
c a, ja,
c    ia = matrix stored in a, ja, ia, format
c On return:
c ao, jao,
c    iao = upper triangular matrix (upper part of a)
c	stored in compressed sparse row format
c note: the diagonal element is the last element in each row.
c i.e. in  a(ia(i+1)-1 )
c ao, jao, iao may be the same as a, ja, ia on entry -- in which case
c getu will overwrite the result on a, ja, ia.
c
c------------------------------------------------------------------------
c local variables
      real*8 t
      integer ko, k, i, kdiag, kfirst
      ko = 0
      do  7 i=1, n
         kfirst = ko+1
         kdiag = 0
         do 71 k = ia(i), ia(i+1) -1
            if (ja(k)  .lt. i) goto 71
            ko = ko+1
            ao(ko) = a(k)
            jao(ko) = ja(k)
            if (ja(k)  .eq. i) kdiag = ko
 71      continue
         if (kdiag .eq. 0 .or. kdiag .eq. kfirst) goto 72
c     exchange
         t = ao(kdiag)
         ao(kdiag) = ao(kfirst)
         ao(kfirst) = t
c
         k = jao(kdiag)
         jao(kdiag) = jao(kfirst)
         jao(kfirst) = k
 72      iao(i) = kfirst
 7    continue
c     redefine iao(n+1)
      iao(n+1) = ko+1
      return
c----------end-of-getu -------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine levels (n, jal, ial, nlev, lev, ilev, levnum)
      integer jal(*),ial(*), levnum(*), ilev(*), lev(*)
c-----------------------------------------------------------------------
c levels gets the level structure of a lower triangular matrix
c for level scheduling in the parallel solution of triangular systems
c strict lower matrices (e.g. unit) as well matrices with their main
c diagonal are accepted.
c-----------------------------------------------------------------------
c on entry:
c----------
c n        = integer. The row dimension of the matrix
c jal, ial =
c
c on return:
c-----------
c nlev     = integer. number of levels found
c lev      = integer array of length n containing the level
c            scheduling permutation.
c ilev     = integer array. pointer to beginning of levels in lev.
c            the numbers lev(i) to lev(i+1)-1 contain the row numbers
c            that belong to level number i, in the level scheduling
c            ordering. The equations of the same level can be solved
c            in parallel, once those of all the previous levels have
c            been solved.
c work arrays:
c-------------
c levnum   = integer array of length n (containing the level numbers
c            of each unknown on return)
c-----------------------------------------------------------------------
      do 10 i = 1, n
         levnum(i) = 0
 10   continue
c
c     compute level of each node --
c
      nlev = 0
      do 20 i = 1, n
         levi = 0
         do 15 j = ial(i), ial(i+1) - 1
            levi = max (levi, levnum(jal(j)))
 15      continue
         levi = levi+1
         levnum(i) = levi
         nlev = max(nlev,levi)
 20   continue
c-------------set data structure  --------------------------------------
      do 21 j=1, nlev+1
         ilev(j) = 0
 21   continue
c------count  number   of elements in each level -----------------------
      do 22 j=1, n
         i = levnum(j)+1
         ilev(i) = ilev(i)+1
 22   continue
c---- set up pointer for  each  level ----------------------------------
      ilev(1) = 1
      do 23 j=1, nlev
         ilev(j+1) = ilev(j)+ilev(j+1)
 23   continue

c-----determine elements of each level --------------------------------
      do 30 j=1,n
         i = levnum(j)
         lev(ilev(i)) = j
         ilev(i) = ilev(i)+1
 30   continue
c     reset pointers backwards
      do 35 j=nlev, 1, -1
         ilev(j+1) = ilev(j)
 35   continue
      ilev(1) = 1
      return
c----------end-of-levels------------------------------------------------
C-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine amask (nrow,ncol,a,ja,ia,jmask,imask,
     *                  c,jc,ic,iw,nzmax,ierr)
c---------------------------------------------------------------------
      real*8 a(*),c(*)
      integer ia(nrow+1),ja(*),jc(*),ic(nrow+1),jmask(*),imask(nrow+1)
      logical iw(ncol)
c-----------------------------------------------------------------------
c This subroutine builds a sparse matrix from an input matrix by
c extracting only elements in positions defined by the mask jmask, imask
c-----------------------------------------------------------------------
c On entry:
c---------
c nrow  = integer. row dimension of input matrix
c ncol	= integer. Column dimension of input matrix.
c
c a,
c ja,
c ia	= matrix in Compressed Sparse Row format
c
c jmask,
c imask = matrix defining mask (pattern only) stored in compressed
c         sparse row format.
c
c nzmax = length of arrays c and jc. see ierr.
c
c On return:
c-----------
c
c a, ja, ia and jmask, imask are unchanged.
c
c c
c jc,
c ic	= the output matrix in Compressed Sparse Row format.
c
c ierr  = integer. serving as error message.c
c         ierr = 1  means normal return
c         ierr .gt. 1 means that amask stopped when processing
c         row number ierr, because there was not enough space in
c         c, jc according to the value of nzmax.
c
c work arrays:
c-------------
c iw	= logical work array of length ncol.
c
c note:
c------ the  algorithm is in place: c, jc, ic can be the same as
c a, ja, ia in which cas the code will overwrite the matrix c
c on a, ja, ia
c
c-----------------------------------------------------------------------
      ierr = 0
      len = 0
      do 1 j=1, ncol
         iw(j) = .false.
 1    continue
c     unpack the mask for row ii in iw
      do 100 ii=1, nrow
c     save pointer in order to be able to do things in place
         do 2 k=imask(ii), imask(ii+1)-1
            iw(jmask(k)) = .true.
 2       continue
c     add umasked elemnts of row ii
         k1 = ia(ii)
         k2 = ia(ii+1)-1
         ic(ii) = len+1
         do 200 k=k1,k2
            j = ja(k)
            if (iw(j)) then
               len = len+1
               if (len .gt. nzmax) then
                  ierr = ii
                  return
               endif
               jc(len) = j
               c(len) = a(k)
            endif
 200     continue
c
         do 3 k=imask(ii), imask(ii+1)-1
            iw(jmask(k)) = .false.
 3       continue
 100  continue
      ic(nrow+1)=len+1
c
      return
c-----end-of-amask -----------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine rperm (nrow,a,ja,ia,ao,jao,iao,perm,job)
      integer nrow,ja(*),ia(nrow+1),jao(*),iao(nrow+1),perm(nrow),job
      real*8 a(*),ao(*)
c-----------------------------------------------------------------------
c this subroutine permutes the rows of a matrix in CSR format.
c rperm  computes B = P A  where P is a permutation matrix.
c the permutation P is defined through the array perm: for each j,
c perm(j) represents the destination row number of row number j.
c Youcef Saad -- recoded Jan 28, 1991.
c-----------------------------------------------------------------------
c on entry:
c----------
c n 	= dimension of the matrix
c a, ja, ia = input matrix in csr format
c perm 	= integer array of length nrow containing the permutation arrays
c	  for the rows: perm(i) is the destination of row i in the
c         permuted matrix.
c         ---> a(i,j) in the original matrix becomes a(perm(i),j)
c         in the output  matrix.
c
c job	= integer indicating the work to be done:
c 		job = 1	permute a, ja, ia into ao, jao, iao
c                       (including the copying of real values ao and
c                       the array iao).
c 		job .ne. 1 :  ignore real values.
c                     (in which case arrays a and ao are not needed nor
c                      used).
c
c------------
c on return:
c------------
c ao, jao, iao = input matrix in a, ja, ia format
c note :
c        if (job.ne.1)  then the arrays a and ao are not used.
c----------------------------------------------------------------------c
c           Y. Saad, May  2, 1990                                      c
c----------------------------------------------------------------------c
      logical values
      values = (job .eq. 1)
c
c     determine pointers for output matix.
c
      do 50 j=1,nrow
         i = perm(j)
         iao(i+1) = ia(j+1) - ia(j)
 50   continue
c
c get pointers from lengths
c
      iao(1) = 1
      do 51 j=1,nrow
         iao(j+1)=iao(j+1)+iao(j)
 51   continue
c
c copying
c
      do 100 ii=1,nrow
c
c old row = ii  -- new row = iperm(ii) -- ko = new pointer
c
         ko = iao(perm(ii))
         do 60 k=ia(ii), ia(ii+1)-1
            jao(ko) = ja(k)
            if (values) ao(ko) = a(k)
            ko = ko+1
 60      continue
 100  continue
c
      return
c---------end-of-rperm -------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine cperm (nrow,a,ja,ia,ao,jao,iao,perm,job)
      integer nrow,ja(*),ia(nrow+1),jao(*),iao(nrow+1),perm(*), job
      real*8 a(*), ao(*)
c-----------------------------------------------------------------------
c this subroutine permutes the columns of a matrix a, ja, ia.
c the result is written in the output matrix  ao, jao, iao.
c cperm computes B = A P, where  P is a permutation matrix
c that maps column j into column perm(j), i.e., on return
c      a(i,j) becomes a(i,perm(j)) in new matrix
c Y. Saad, May 2, 1990 / modified Jan. 28, 1991.
c-----------------------------------------------------------------------
c on entry:
c----------
c nrow 	= row dimension of the matrix
c
c a, ja, ia = input matrix in csr format.
c
c perm	= integer array of length ncol (number of columns of A
c         containing the permutation array  the columns:
c         a(i,j) in the original matrix becomes a(i,perm(j))
c         in the output matrix.
c
c job	= integer indicating the work to be done:
c 		job = 1	permute a, ja, ia into ao, jao, iao
c                       (including the copying of real values ao and
c                       the array iao).
c 		job .ne. 1 :  ignore real values ao and ignore iao.
c
c------------
c on return:
c------------
c ao, jao, iao = input matrix in a, ja, ia format (array ao not needed)
c
c Notes:
c-------
c 1. if job=1 then ao, iao are not used.
c 2. This routine is in place: ja, jao can be the same.
c 3. If the matrix is initially sorted (by increasing column number)
c    then ao,jao,iao  may not be on return.
c
c----------------------------------------------------------------------c
c local parameters:
      integer k, i, nnz
c
      nnz = ia(nrow+1)-1
      do 100 k=1,nnz
         jao(k) = perm(ja(k))
 100  continue
c
c     done with ja array. return if no need to touch values.
c
      if (job .ne. 1) return
c
c else get new pointers -- and copy values too.
c
      do 1 i=1, nrow+1
         iao(i) = ia(i)
 1    continue
c
      do 2 k=1, nnz
         ao(k) = a(k)
 2    continue
c
      return
c---------end-of-cperm--------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine dperm (nrow,a,ja,ia,ao,jao,iao,perm,qperm,job)
      integer nrow,ja(*),ia(nrow+1),jao(*),iao(nrow+1),perm(nrow),
     +        qperm(*),job
      real*8 a(*),ao(*)
c-----------------------------------------------------------------------
c This routine permutes the rows and columns of a matrix stored in CSR
c format. i.e., it computes P A Q, where P, Q are permutation matrices.
c P maps row i into row perm(i) and Q maps column j into column qperm(j):
c      a(i,j)    becomes   a(perm(i),qperm(j)) in new matrix
c In the particular case where Q is the transpose of P (symmetric
c permutation of A) then qperm is not needed.
c note that qperm should be of length ncol (number of columns) but this
c is not checked.
c-----------------------------------------------------------------------
c Y. Saad, Sep. 21 1989 / recoded Jan. 28 1991.
c-----------------------------------------------------------------------
c on entry:
c----------
c n 	= dimension of the matrix
c a, ja,
c    ia = input matrix in a, ja, ia format
c perm 	= integer array of length n containing the permutation arrays
c	  for the rows: perm(i) is the destination of row i in the
c         permuted matrix -- also the destination of column i in case
c         permutation is symmetric (job .le. 2)
c
c qperm	= same thing for the columns. This should be provided only
c         if job=3 or job=4, i.e., only in the case of a nonsymmetric
c	  permutation of rows and columns. Otherwise qperm is a dummy
c
c job	= integer indicating the work to be done:
c * job = 1,2 permutation is symmetric  Ao :== P * A * transp(P)
c 		job = 1	permute a, ja, ia into ao, jao, iao
c 		job = 2 permute matrix ignoring real values.
c * job = 3,4 permutation is non-symmetric  Ao :== P * A * Q
c 		job = 3	permute a, ja, ia into ao, jao, iao
c 		job = 4 permute matrix ignoring real values.
c
c on return:
c-----------
c ao, jao, iao = input matrix in a, ja, ia format
c
c in case job .eq. 2 or job .eq. 4, a and ao are never referred to
c and can be dummy arguments.
c Notes:
c-------
c  1) algorithm is in place
c  2) column indices may not be sorted on return even  though they may be
c     on entry.
c----------------------------------------------------------------------c
c local variables
      integer locjob, mod
c
c     locjob indicates whether or not real values must be copied.
c
      locjob = mod(job,2)
c
c permute rows first
c
      call rperm (nrow,a,ja,ia,ao,jao,iao,perm,locjob)
c
c then permute columns
c
      locjob = 0
c
      if (job .le. 2) then
         call cperm (nrow,ao,jao,iao,ao,jao,iao,perm,locjob)
      else
         call cperm (nrow,ao,jao,iao,ao,jao,iao,qperm,locjob)
      endif
c
      return
c-------end-of-dperm----------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine dperm1 (i1,i2,a,ja,ia,b,jb,ib,perm,ipos,job)
      integer i1,i2,job,ja(*),ia(*),jb(*),ib(*),perm(*)
      real*8 a(*),b(*)
c-----------------------------------------------------------------------
c     general submatrix extraction routine.
c-----------------------------------------------------------------------
c     extracts rows perm(i1), perm(i1+1), ..., perm(i2) (in this order)
c     from a matrix (doing nothing in the column indices.) The resulting
c     submatrix is constructed in b, jb, ib. A pointer ipos to the
c     beginning of arrays b,jb,is also allowed (i.e., nonzero elements
c     are accumulated starting in position ipos of b, jb).
c-----------------------------------------------------------------------
c Y. Saad,Sep. 21 1989 / recoded Jan. 28 1991 / modified for PSPARSLIB
c Sept. 1997..
c-----------------------------------------------------------------------
c on entry:
c----------
c n 	= dimension of the matrix
c a,ja,
c   ia  = input matrix in CSR format
c perm 	= integer array of length n containing the indices of the rows
c         to be extracted.
c
c job   = job indicator. if (job .ne.1) values are not copied (i.e.,
c         only pattern is copied).
c
c on return:
c-----------
c b,ja,
c ib   = matrix in csr format. b(ipos:ipos+nnz-1),jb(ipos:ipos+nnz-1)
c     contain the value and column indices respectively of the nnz
c     nonzero elements of the permuted matrix. thus ib(1)=ipos.
c
c Notes:
c-------
c  algorithm is NOT in place
c-----------------------------------------------------------------------
c local variables
c
      integer ko,irow,k
      logical values
c-----------------------------------------------------------------------
      values = (job .eq. 1)
      ko = ipos
      ib(1) = ko
      do 900 i=i1,i2
         irow = perm(i)
         do 800 k=ia(irow),ia(irow+1)-1
            if (values) b(ko) = a(k)
            jb(ko) = ja(k)
            ko=ko+1
 800     continue
         ib(i-i1+2) = ko
 900  continue
      return
c--------end-of-dperm1--------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine dperm2 (i1,i2,a,ja,ia,b,jb,ib,cperm,rperm,istart,
     *        ipos,job)
      integer i1,i2,job,istart,ja(*),ia(*),jb(*),ib(*),cperm(*),rperm(*)
      real*8 a(*),b(*)
c-----------------------------------------------------------------------
c     general submatrix permutation/ extraction routine.
c-----------------------------------------------------------------------
c     extracts rows rperm(i1), rperm(i1+1), ..., rperm(i2) and does an
c     associated column permutation (using array cperm). The resulting
c     submatrix is constructed in b, jb, ib. For added flexibility, the
c     extracted elements are put in sequence starting from row 'istart'
c     of B. In addition a pointer ipos to the beginning of arrays b,jb,
c     is also allowed (i.e., nonzero elements are accumulated starting in
c     position ipos of b, jb). In most applications istart and ipos are
c     equal to one. However, the generality adds substantial flexiblity.
c     EXPLE: (1) to permute msr to msr (excluding diagonals)
c     call dperm2 (1,n,a,ja,ja,b,jb,jb,rperm,rperm,1,n+2)
c            (2) To extract rows 1 to 10: define rperm and cperm to be
c     identity permutations (rperm(i)=i, i=1,n) and then
c            call dperm2 (1,10,a,ja,ia,b,jb,ib,rperm,rperm,1,1)
c            (3) to achieve a symmetric permutation as defined by perm:
c            call dperm2 (1,10,a,ja,ia,b,jb,ib,perm,perm,1,1)
c            (4) to get a symmetric permutation of A and append the
c            resulting data structure to A's data structure (useful!)
c            call dperm2 (1,10,a,ja,ia,a,ja,ia(n+1),perm,perm,1,ia(n+1))
c-----------------------------------------------------------------------
c Y. Saad,Sep. 21 1989 / recoded Jan. 28 1991.
c-----------------------------------------------------------------------
c on entry:
c----------
c n 	= dimension of the matrix
c i1,i2 = extract rows rperm(i1) to rperm(i2) of A, with i1<i2.
c
c a,ja,
c   ia  = input matrix in CSR format
c cperm = integer array of length n containing the permutation arrays
c	  for the columns: cperm(i) is the destination of column j,
c         i.e., any column index ja(k) is transformed into cperm(ja(k))
c
c rperm	=  permutation array for the rows. rperm(i) = origin (in A) of
c          row i in B. This is the reverse permutation relative to the
c          ones used in routines cperm, dperm,....
c          rows rperm(i1), rperm(i1)+1, ... rperm(i2) are
c          extracted from A and stacked into B, starting in row istart
c          of B.
c istart= starting row for B where extracted matrix is to be added.
c         this is also only a pointer of the be beginning address for
c         ib , on return.
c ipos  = beginning position in arrays b and jb where to start copying
c         elements. Thus, ib(istart) = ipos.
c
c job   = job indicator. if (job .ne.1) values are not copied (i.e.,
c         only pattern is copied).
c
c on return:
c-----------
c b,ja,
c ib   = matrix in csr format. positions 1,2,...,istart-1 of ib
c     are not touched. b(ipos:ipos+nnz-1),jb(ipos:ipos+nnz-1)
c     contain the value and column indices respectively of the nnz
c     nonzero elements of the permuted matrix. thus ib(istart)=ipos.
c
c Notes:
c-------
c  1) algorithm is NOT in place
c  2) column indices may not be sorted on return even  though they
c     may be on entry.
c-----------------------------------------------------------------------
c local variables
c
      integer ko,irow,k
      logical values
c-----------------------------------------------------------------------
      values = (job .eq. 1)
      ko = ipos
      ib(istart) = ko
      do 900 i=i1,i2
         irow = rperm(i)
         do 800 k=ia(irow),ia(irow+1)-1
            if (values) b(ko) = a(k)
            jb(ko) = cperm(ja(k))
            ko=ko+1
 800     continue
         ib(istart+i-i1+1) = ko
 900  continue
      return
c--------end-of-dperm2--------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine dmperm (nrow,a,ja,ao,jao,perm,job)
      integer nrow,ja(*),jao(*),perm(nrow),job
      real*8 a(*),ao(*)
c-----------------------------------------------------------------------
c This routine performs a symmetric permutation of the rows and
c columns of a matrix stored in MSR format. i.e., it computes
c B = P A transp(P), where P, is  a permutation matrix.
c P maps row i into row perm(i) and column j into column perm(j):
c      a(i,j)    becomes   a(perm(i),perm(j)) in new matrix
c (i.e.  ao(perm(i),perm(j)) = a(i,j) )
c calls dperm.
c-----------------------------------------------------------------------
c Y. Saad, Nov 15, 1991.
c-----------------------------------------------------------------------
c on entry:
c----------
c n 	= dimension of the matrix
c a, ja = input matrix in MSR format.
c perm 	= integer array of length n containing the permutation arrays
c	  for the rows: perm(i) is the destination of row i in the
c         permuted matrix -- also the destination of column i in case
c         permutation is symmetric (job .le. 2)
c
c job	= integer indicating the work to be done:
c 		job = 1	permute a, ja, ia into ao, jao, iao
c 		job = 2 permute matrix ignoring real values.
c
c on return:
c-----------
c ao, jao = output matrix in MSR.
c
c in case job .eq. 2 a and ao are never referred to and can be dummy
c arguments.
c
c Notes:
c-------
c  1) algorithm is NOT in place
c  2) column indices may not be sorted on return even  though they may be
c     on entry.
c----------------------------------------------------------------------c
c     local variables
c
      integer n1, n2
      n1 = nrow+1
      n2 = n1+1
c
      call dperm (nrow,a,ja,ja,ao(n2),jao(n2),jao,perm,perm,job)
c
      jao(1) = n2
      do 101 j=1, nrow
         ao(perm(j)) = a(j)
         jao(j+1) = jao(j+1)+n1
 101  continue
c
c done
c
      return
c-----------------------------------------------------------------------
c--------end-of-dmperm--------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine dvperm (n, x, perm)
      integer n, perm(n)
      real*8 x(n)
c-----------------------------------------------------------------------
c this subroutine performs an in-place permutation of a real vector x
c according to the permutation array perm(*), i.e., on return,
c the vector x satisfies,
c
c	x(perm(j)) :== x(j), j=1,2,.., n
c
c-----------------------------------------------------------------------
c on entry:
c---------
c n 	= length of vector x.
c perm 	= integer array of length n containing the permutation  array.
c x	= input vector
c
c on return:
c----------
c x	= vector x permuted according to x(perm(*)) :=  x(*)
c
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
c local variables
      real*8 tmp, tmp1
c
      init      = 1
      tmp	= x(init)
      ii        = perm(init)
      perm(init)= -perm(init)
      k         = 0
c
c loop
c
 6    k = k+1
c
c save the chased element --
c
      tmp1	  = x(ii)
      x(ii)     = tmp
      next	  = perm(ii)
      if (next .lt. 0 ) goto 65
c
c test for end
c
      if (k .gt. n) goto 101
      tmp       = tmp1
      perm(ii)  = - perm(ii)
      ii        = next
c
c end loop
c
      goto 6
c
c reinitilaize cycle --
c
 65   init      = init+1
      if (init .gt. n) goto 101
      if (perm(init) .lt. 0) goto 65
      tmp	= x(init)
      ii	= perm(init)
      perm(init)=-perm(init)
      goto 6
c
 101  continue
      do 200 j=1, n
         perm(j) = -perm(j)
 200  continue
c
      return
c-------------------end-of-dvperm---------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine ivperm (n, ix, perm)
      integer n, perm(n), ix(n)
c-----------------------------------------------------------------------
c this subroutine performs an in-place permutation of an integer vector
c ix according to the permutation array perm(*), i.e., on return,
c the vector x satisfies,
c
c	ix(perm(j)) :== ix(j), j=1,2,.., n
c
c-----------------------------------------------------------------------
c on entry:
c---------
c n 	= length of vector x.
c perm 	= integer array of length n containing the permutation  array.
c ix	= input vector
c
c on return:
c----------
c ix	= vector x permuted according to ix(perm(*)) :=  ix(*)
c
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
c local variables
      integer tmp, tmp1
c
      init      = 1
      tmp	= ix(init)
      ii        = perm(init)
      perm(init)= -perm(init)
      k         = 0
c
c loop
c
 6    k = k+1
c
c save the chased element --
c
      tmp1	  = ix(ii)
      ix(ii)     = tmp
      next	  = perm(ii)
      if (next .lt. 0 ) goto 65
c
c test for end
c
      if (k .gt. n) goto 101
      tmp       = tmp1
      perm(ii)  = - perm(ii)
      ii        = next
c
c end loop
c
      goto 6
c
c reinitilaize cycle --
c
 65   init      = init+1
      if (init .gt. n) goto 101
      if (perm(init) .lt. 0) goto 65
      tmp	= ix(init)
      ii	= perm(init)
      perm(init)=-perm(init)
      goto 6
c
 101  continue
      do 200 j=1, n
         perm(j) = -perm(j)
 200  continue
c
      return
c-------------------end-of-ivperm---------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine retmx (n,a,ja,ia,dd)
      real*8 a(*),dd(*)
      integer n,ia(*),ja(*)
c-----------------------------------------------------------------------
c returns in dd(*) the max absolute value of elements in row *.
c used for scaling purposes. superseded by rnrms  .
c
c on entry:
c n	= dimension of A
c a,ja,ia
c	= matrix stored in compressed sparse row format
c dd	= real*8 array of length n. On output,entry dd(i) contains
c	  the element of row i that has the largest absolute value.
c	  Moreover the sign of dd is modified such that it is the
c	  same as that of the diagonal element in row i.
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
c local variables
      integer k2, i, k1, k
      real*8 t, t1, t2
c
c initialize
c
      k2 = 1
      do 11 i=1,n
         k1 = k2
         k2 = ia(i+1) - 1
         t = 0.0d0
         do 101  k=k1,k2
            t1 = abs(a(k))
            if (t1 .gt. t) t = t1
            if (ja(k) .eq. i) then
               if (a(k) .ge. 0.0) then
                  t2 = a(k)
               else
                  t2 = - a(k)
               endif
            endif
 101     continue
         dd(i) =  t2*t
c     we do not invert diag
 11   continue
      return
c---------end of retmx -------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine diapos  (n,ja,ia,idiag)
      integer ia(n+1), ja(*), idiag(n)
c-----------------------------------------------------------------------
c this subroutine returns the positions of the diagonal elements of a
c sparse matrix a, ja, ia, in the array idiag.
c-----------------------------------------------------------------------
c on entry:
c----------
c
c n	= integer. row dimension of the matrix a.
c a,ja,
c    ia = matrix stored compressed sparse row format. a array skipped.
c
c on return:
c-----------
c idiag  = integer array of length n. The i-th entry of idiag
c          points to the diagonal element a(i,i) in the arrays
c          a, ja. (i.e., a(idiag(i)) = element A(i,i) of matrix A)
c          if no diagonal element is found the entry is set to 0.
c----------------------------------------------------------------------c
c           Y. Saad, March, 1990
c----------------------------------------------------------------------c
      do 1 i=1, n
         idiag(i) = 0
 1    continue
c
c     sweep through data structure.
c
      do  6 i=1,n
         do 51 k= ia(i),ia(i+1) -1
            if (ja(k) .eq. i) idiag(i) = k
 51      continue
 6    continue
c----------- -end-of-diapos---------------------------------------------
c-----------------------------------------------------------------------
      return
      end
c-----------------------------------------------------------------------
      subroutine dscaldg (n,a,ja,ia,diag,job)
      real*8 a(*), diag(*),t
      integer ia(*),ja(*)
c-----------------------------------------------------------------------
c scales rows by diag where diag is either given (job=0)
c or to be computed:
c  job = 1 ,scale row i by  by  +/- max |a(i,j) | and put inverse of
c       scaling factor in diag(i),where +/- is the sign of a(i,i).
c  job = 2 scale by 2-norm of each row..
c if diag(i) = 0,then diag(i) is replaced by one
c (no scaling)..
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
      goto (12,11,10) job+1
 10   do 110 j=1,n
         k1= ia(j)
         k2 = ia(j+1)-1
         t = 0.0d0
         do 111 k = k1,k2
 111        t = t+a(k)*a(k)
 110        diag(j) = sqrt(t)
            goto 12
 11   continue
      call retmx (n,a,ja,ia,diag)
c------
 12   do 1 j=1,n
         if (diag(j) .ne. 0.0d0) then
            diag(j) = 1.0d0/diag(j)
         else
            diag(j) = 1.0d0
         endif
 1    continue
      do 2 i=1,n
         t = diag(i)
         do 21 k=ia(i),ia(i+1) -1
            a(k) = a(k)*t
 21      continue
 2    continue
      return
c--------end of dscaldg -----------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine extbdg (n,a,ja,ia,bdiag,nblk,ao,jao,iao)
      implicit real*8 (a-h,o-z)
      real*8 bdiag(*),a(*),ao(*)
      integer ia(*),ja(*),jao(*),iao(*)
c-----------------------------------------------------------------------
c this subroutine extracts the main diagonal blocks of a
c matrix stored in compressed sparse row format and puts the result
c into the array bdiag and the remainder in ao,jao,iao.
c-----------------------------------------------------------------------
c on entry:
c----------
c n	= integer. The row dimension of the matrix a.
c a,
c ja,
c ia    = matrix stored in csr format
c nblk  = dimension of each diagonal block. The diagonal blocks are
c         stored in compressed format rowwise,i.e.,we store in
c	  succession the i nonzeros of the i-th row after those of
c	  row number i-1..
c
c on return:
c----------
c bdiag = real*8 array of size (n x nblk) containing the diagonal
c	  blocks of A on return
c ao,
c jao,
C iao   = remainder of the matrix stored in csr format.
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
      m = 1 + (n-1)/nblk
c this version is sequential -- there is a more parallel version
c that goes through the structure twice ....
      ltr =  ((nblk-1)*nblk)/2
      l = m * ltr
      do 1 i=1,l
         bdiag(i) = 0.0d0
 1    continue
      ko = 0
      kb = 1
      iao(1) = 1
c-------------------------
      do 11 jj = 1,m
         j1 = (jj-1)*nblk+1
         j2 =  min0 (n,j1+nblk-1)
         do 12 j=j1,j2
            do 13 i=ia(j),ia(j+1) -1
               k = ja(i)
               if (k .lt. j1) then
                  ko = ko+1
                  ao(ko) = a(i)
                  jao(ko) = k
               else if (k .lt. j) then
c     kb = (jj-1)*ltr+((j-j1)*(j-j1-1))/2+k-j1+1
c     bdiag(kb) = a(i)
                  bdiag(kb+k-j1) = a(i)
               endif
 13         continue
            kb = kb + j-j1
            iao(j+1) = ko+1
 12      continue
 11   continue
      return
c---------end-of-extbdg-------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine getbwd(n,a,ja,ia,ml,mu)
c-----------------------------------------------------------------------
c gets the bandwidth of lower part and upper part of A.
c does not assume that A is sorted.
c-----------------------------------------------------------------------
c on entry:
c----------
c n	= integer = the row dimension of the matrix
c a, ja,
c    ia = matrix in compressed sparse row format.
c
c on return:
c-----------
c ml	= integer. The bandwidth of the strict lower part of A
c mu	= integer. The bandwidth of the strict upper part of A
c
c Notes:
c ===== ml and mu are allowed to be negative or return. This may be
c       useful since it will tell us whether a band is confined
c       in the strict  upper/lower triangular part.
c       indeed the definitions of ml and mu are
c
c       ml = max ( (i-j)  s.t. a(i,j) .ne. 0  )
c       mu = max ( (j-i)  s.t. a(i,j) .ne. 0  )
c----------------------------------------------------------------------c
c Y. Saad, Sep. 21 1989                                                c
c----------------------------------------------------------------------c
      real*8 a(*)
      integer ja(*),ia(n+1),ml,mu,ldist,i,k
      ml = - n
      mu = - n
      do 3 i=1,n
         do 31 k=ia(i),ia(i+1)-1
            ldist = i-ja(k)
            ml = max(ml,ldist)
            mu = max(mu,-ldist)
 31      continue
 3    continue
      return
c---------------end-of-getbwd ------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine blkfnd (nrow,ja,ia,nblk)
c-----------------------------------------------------------------------
c This routine attemptps to determine whether or not  the input
c matrix has a block structure and finds the blocks size
c if it does. A block matrix is one which is
c comprised of small square dense blocks. If there are zero
c elements within the square blocks and the original data structure
c takes these zeros into account then blkchk may fail to find the
c correct block size.
c-----------------------------------------------------------------------
c on entry
c---------
c nrow	= integer equal to the row dimension of the matrix.
c ja    = integer array containing the column indices of the entries
c         nonzero entries of the matrix stored by row.
c ia    = integer array of length nrow + 1 containing the pointers
c         beginning of each row in array ja.
c
c nblk  = integer containing the assumed value of nblk if job = 0
c
c on return
c----------
c nblk  = integer containing the value found for nblk when job = 1.
c         if imsg .ne. 0 this value is meaningless however.
c
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
      integer ia(nrow+1),ja(*)
c-----------------------------------------------------------------------
c first part of code will find candidate block sizes.
c criterion used here is a simple one: scan rows and  determine groups
c of rows that have the same length and such that the first column
c number and the last column number are identical.
c-----------------------------------------------------------------------
      minlen = ia(2)-ia(1)
      irow   = 1
      do 1 i=2,nrow
         len = ia(i+1)-ia(i)
         if (len .lt. minlen) then
            minlen = len
            irow = i
         endif
 1    continue
c
c     ---- candidates are all dividers of minlen
c
      nblk = 1
      if (minlen .le. 1) return
c
      do 99 iblk = minlen, 1, -1
         if (mod(minlen,iblk) .ne. 0) goto 99
         len = ia(2) - ia(1)
         len0 = len
         jfirst = ja(1)
         jlast = ja(ia(2)-1)
         do 10 jrow = irow+1,irow+nblk-1
            i1 = ia(jrow)
            i2 = ia(jrow+1)-1
            len = i2+1-i1
            jf = ja(i1)
            jl = ja(i2)
            if (len .ne. len0 .or. jf .ne. jfirst .or.
     *           jl .ne. jlast) goto 99
 10      continue
c
c     check for this candidate ----
c
         call blkchk (nrow,ja,ia,iblk,imsg)
         if (imsg .eq. 0) then
c
c     block size found
c
            nblk = iblk
            return
         endif
 99   continue
c--------end-of-blkfnd -------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine blkchk (nrow,ja,ia,nblk,imsg)
c-----------------------------------------------------------------------
c This routine checks whether the input matrix is a block
c matrix with block size of nblk. A block matrix is one which is
c comprised of small square dense blocks. If there are zero
c elements within the square blocks and the data structure
c takes them into account then blkchk may fail to find the
c correct block size.
c-----------------------------------------------------------------------
c on entry
c---------
c nrow	= integer equal to the row dimension of the matrix.
c ja    = integer array containing the column indices of the entries
c         nonzero entries of the matrix stored by row.
c ia    = integer array of length nrow + 1 containing the pointers
c         beginning of each row in array ja.
c
c nblk  = integer containing the value of nblk to be checked.
c
c on return
c----------
c
c imsg  = integer containing a message  with the following meaning.
c          imsg = 0 means that the output value of nblk is a correct
c                   block size. nblk .lt. 0 means nblk not correct
c                   block size.
c          imsg = -1 : nblk does not divide nrow
c          imsg = -2 : a starting element in a row is at wrong position
c             (j .ne. mult*nblk +1 )
c          imsg = -3 : nblk does divide a row length -
c          imsg = -4 : an element is isolated outside a block or
c             two rows in same group have different lengths
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
      integer ia(nrow+1),ja(*)
c----------------------------------------------------------------------
c first part of code will find candidate block sizes.
c this is not guaranteed to work . so a check is done at the end
c the criterion used here is a simple one:
c scan rows and determine groups of rows that have the same length
c and such that the first column number and the last column number
c are identical.
c----------------------------------------------------------------------
      imsg = 0
      if (nblk .le. 1) return
      nr = nrow/nblk
      if (nr*nblk .ne. nrow) goto 101
c--   main loop ---------------------------------------------------------
      irow = 1
      do 20 ii=1, nr
c     i1= starting position for group of nblk rows in original matrix
         i1 = ia(irow)
         j2 = i1
c     lena = length of each row in that group  in the original matrix
         lena = ia(irow+1)-i1
c     len = length of each block-row in that group in the output matrix
         len = lena/nblk
         if (len* nblk .ne. lena) goto 103
c
c     for each row
c
         do 6 i = 1, nblk
            irow = irow + 1
            if (ia(irow)-ia(irow-1) .ne. lena ) goto 104
c
c     for each block
c
            do 7 k=0, len-1
               jstart = ja(i1+nblk*k)-1
               if ( (jstart/nblk)*nblk .ne. jstart) goto 102
c
c     for each column
c
               do 5 j=1, nblk
                  if (jstart+j .ne. ja(j2) )  goto 104
                  j2 = j2+1
 5             continue
 7          continue
 6       continue
 20   continue
c     went through all loops successfully:
      return
 101  imsg = -1
      return
 102  imsg = -2
      return
 103  imsg = -3
      return
 104  imsg = -4
c----------------end of chkblk -----------------------------------------
c-----------------------------------------------------------------------
      return
      end
c-----------------------------------------------------------------------
      subroutine infdia (n,ja,ia,ind,idiag)
      integer ia(*), ind(*), ja(*)
c-----------------------------------------------------------------------
c     obtains information on the diagonals of A.
c-----------------------------------------------------------------------
c this subroutine finds the lengths of each of the 2*n-1 diagonals of A
c it also outputs the number of nonzero diagonals found.
c-----------------------------------------------------------------------
c on entry:
c----------
c n	= dimension of the matrix a.
c
c a,    ..... not needed here.
c ja,
c ia    = matrix stored in csr format
c
c on return:
c-----------
c
c idiag = integer. number of nonzero diagonals found.
c
c ind   = integer array of length at least 2*n-1. The k-th entry in
c         ind contains the number of nonzero elements in the diagonal
c         number k, the numbering beeing from the lowermost diagonal
c         (bottom-left). In other words ind(k) = length of diagonal
c         whose offset wrt the main diagonal is = - n + k.
c----------------------------------------------------------------------c
c           Y. Saad, Sep. 21 1989                                      c
c----------------------------------------------------------------------c
      n2= n+n-1
      do 1 i=1,n2
         ind(i) = 0
 1    continue
      do 3 i=1, n
         do 2 k=ia(i),ia(i+1)-1
            j = ja(k)
            ind(n+j-i) = ind(n+j-i) +1
 2       continue
 3    continue
c     count the nonzero ones.
      idiag = 0
      do 41 k=1, n2
         if (ind(k) .ne. 0) idiag = idiag+1
 41   continue
      return
c done
c------end-of-infdia ---------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine amubdg (nrow,ncol,ncolb,ja,ia,jb,ib,ndegr,nnz,iw)
      integer ja(*),jb(*),ia(nrow+1),ib(ncol+1),ndegr(nrow),iw(ncolb)
c-----------------------------------------------------------------------
c gets the number of nonzero elements in each row of A*B and the total
c number of nonzero elements in A*B.
c-----------------------------------------------------------------------
c on entry:
c --------
c
c nrow  = integer.  row dimension of matrix A
c ncol  = integer.  column dimension of matrix A = row dimension of
c                   matrix B.
c ncolb = integer. the colum dimension of the matrix B.
c
c ja, ia= row structure of input matrix A: ja = column indices of
c         the nonzero elements of A stored by rows.
c         ia = pointer to beginning of each row  in ja.
c
c jb, ib= row structure of input matrix B: jb = column indices of
c         the nonzero elements of A stored by rows.
c         ib = pointer to beginning of each row  in jb.
c
c on return:
c ---------
c ndegr	= integer array of length nrow containing the degrees (i.e.,
c         the number of nonzeros in  each row of the matrix A * B
c
c nnz   = total number of nonzero elements found in A * B
c
c work arrays:
c-------------
c iw	= integer work array of length ncolb.
c-----------------------------------------------------------------------
      do 1 k=1, ncolb
         iw(k) = 0
 1    continue

      do 2 k=1, nrow
         ndegr(k) = 0
 2    continue
c
c     method used: Transp(A) * A = sum [over i=1, nrow]  a(i)^T a(i)
c     where a(i) = i-th row of  A. We must be careful not to add  the
c     elements already accounted for.
c
c
      do 7 ii=1,nrow
c
c     for each row of A
c
         ldg = 0
c
c    end-of-linked list
c
         last = -1
         do 6 j = ia(ii),ia(ii+1)-1
c
c     row number to be added:
c
            jr = ja(j)
            do 5 k=ib(jr),ib(jr+1)-1
               jc = jb(k)
               if (iw(jc) .eq. 0) then
c
c     add one element to the linked list
c
                  ldg = ldg + 1
                  iw(jc) = last
                  last = jc
               endif
 5          continue
 6       continue
         ndegr(ii) = ldg
c
c     reset iw to zero
c
         do 61 k=1,ldg
            j = iw(last)
            iw(last) = 0
            last = j
 61      continue
c-----------------------------------------------------------------------
 7    continue
c
      nnz = 0
      do 8 ii=1, nrow
         nnz = nnz+ndegr(ii)
 8    continue
c
      return
c---------------end-of-amubdg ------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine aplbdg (nrow,ncol,ja,ia,jb,ib,ndegr,nnz,iw)
      integer ja(*),jb(*),ia(nrow+1),ib(nrow+1),iw(ncol),ndegr(nrow)
c-----------------------------------------------------------------------
c gets the number of nonzero elements in each row of A+B and the total
c number of nonzero elements in A+B.
c-----------------------------------------------------------------------
c on entry:
c ---------
c nrow	= integer. The row dimension of A and B
c ncol  = integer. The column dimension of A and B.
c
c a,
c ja,
c ia   = Matrix A in compressed sparse row format.
c
c b,
c jb,
c ib	=  Matrix B in compressed sparse row format.
c
c on return:
c----------
c ndegr	= integer array of length nrow containing the degrees (i.e.,
c         the number of nonzeros in  each row of the matrix A + B.
c
c nnz   = total number of nonzero elements found in A * B
c
c work arrays:
c------------
c iw	= integer work array of length equal to ncol.
c
c-----------------------------------------------------------------------
      do 1 k=1, ncol
         iw(k) = 0
 1    continue
c
      do 2 k=1, nrow
         ndegr(k) = 0
 2    continue
c
      do 7 ii=1,nrow
         ldg = 0
c
c    end-of-linked list
c
         last = -1
c
c     row of A
c
         do 5 j = ia(ii),ia(ii+1)-1
            jr = ja(j)
c
c     add element to the linked list
c
            ldg = ldg + 1
            iw(jr) = last
            last = jr
 5       continue
c
c     row of B
c
         do 6 j=ib(ii),ib(ii+1)-1
            jc = jb(j)
            if (iw(jc) .eq. 0) then
c
c     add one element to the linked list
c
               ldg = ldg + 1
               iw(jc) = last
               last = jc
            endif
 6       continue
c     done with row ii.
         ndegr(ii) = ldg
c
c     reset iw to zero
c
         do 61 k=1,ldg
            j = iw(last)
            iw(last) = 0
            last = j
 61      continue
c-----------------------------------------------------------------------
 7    continue
c
      nnz = 0
      do 8 ii=1, nrow
         nnz = nnz+ndegr(ii)
 8    continue
      return
c----------------end-of-aplbdg -----------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine rnrms   (nrow, nrm, a, ja, ia, diag)
      real*8 a(*), diag(nrow), scal
      integer ja(*), ia(nrow+1)
c-----------------------------------------------------------------------
c gets the norms of each row of A. (choice of three norms)
c-----------------------------------------------------------------------
c on entry:
c ---------
c nrow	= integer. The row dimension of A
c
c nrm   = integer. norm indicator. nrm = 1, means 1-norm, nrm =2
c                  means the 2-nrm, nrm = 0 means max norm
c
c a,
c ja,
c ia   = Matrix A in compressed sparse row format.
c
c on return:
c----------
c
c diag = real vector of length nrow containing the norms
c
c-----------------------------------------------------------------
      do 1 ii=1,nrow
c
c     compute the norm if each element.
c
         scal = 0.0d0
         k1 = ia(ii)
         k2 = ia(ii+1)-1
         if (nrm .eq. 0) then
            do 2 k=k1, k2
               scal = max(scal,abs(a(k) ) )
 2          continue
         elseif (nrm .eq. 1) then
            do 3 k=k1, k2
               scal = scal + abs(a(k) )
 3          continue
         else
            do 4 k=k1, k2
               scal = scal+a(k)**2
 4          continue
         endif
         if (nrm .eq. 2) scal = sqrt(scal)
         diag(ii) = scal
 1    continue
      return
c-----------------------------------------------------------------------
c-------------end-of-rnrms----------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine cnrms   (nrow, nrm, a, ja, ia, diag)
      real*8 a(*), diag(nrow)
      integer ja(*), ia(nrow+1)
c-----------------------------------------------------------------------
c gets the norms of each column of A. (choice of three norms)
c-----------------------------------------------------------------------
c on entry:
c ---------
c nrow	= integer. The row dimension of A
c
c nrm   = integer. norm indicator. nrm = 1, means 1-norm, nrm =2
c                  means the 2-nrm, nrm = 0 means max norm
c
c a,
c ja,
c ia   = Matrix A in compressed sparse row format.
c
c on return:
c----------
c
c diag = real vector of length nrow containing the norms
c
c-----------------------------------------------------------------
      do 10 k=1, nrow
         diag(k) = 0.0d0
 10   continue
      do 1 ii=1,nrow
         k1 = ia(ii)
         k2 = ia(ii+1)-1
         do 2 k=k1, k2
            j = ja(k)
c     update the norm of each column
            if (nrm .eq. 0) then
               diag(j) = max(diag(j),abs(a(k) ) )
            elseif (nrm .eq. 1) then
               diag(j) = diag(j) + abs(a(k) )
            else
               diag(j) = diag(j)+a(k)**2
            endif
 2       continue
 1    continue
      if (nrm .ne. 2) return
      do 3 k=1, nrow
         diag(k) = sqrt(diag(k))
 3    continue
      return
c-----------------------------------------------------------------------
c------------end-of-cnrms-----------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine roscal(nrow,job,nrm,a,ja,ia,diag,b,jb,ib,ierr)
      real*8 a(*), b(*), diag(nrow)
      integer nrow,job,nrm,ja(*),jb(*),ia(nrow+1),ib(nrow+1),ierr
c-----------------------------------------------------------------------
c scales the rows of A such that their norms are one on return
c 3 choices of norms: 1-norm, 2-norm, max-norm.
c-----------------------------------------------------------------------
c on entry:
c ---------
c nrow	= integer. The row dimension of A
c
c job   = integer. job indicator. Job=0 means get array b only
c         job = 1 means get b, and the integer arrays ib, jb.
c
c nrm   = integer. norm indicator. nrm = 1, means 1-norm, nrm =2
c                  means the 2-nrm, nrm = 0 means max norm
c
c a,
c ja,
c ia   = Matrix A in compressed sparse row format.
c
c on return:
c----------
c
c diag = diagonal matrix stored as a vector containing the matrix
c        by which the rows have been scaled, i.e., on return
c        we have B = Diag*A.
c
c b,
c jb,
c ib	= resulting matrix B in compressed sparse row sparse format.
c
c ierr  = error message. ierr=0     : Normal return
c                        ierr=i > 0 : Row number i is a zero row.
c Notes:
c-------
c 1)        The column dimension of A is not needed.
c 2)        algorithm in place (B can take the place of A).
c-----------------------------------------------------------------
      call rnrms (nrow,nrm,a,ja,ia,diag)
      ierr = 0
      do 1 j=1, nrow
         if (diag(j) .eq. 0.0d0) then
            ierr = j
            return
         else
            diag(j) = 1.0d0/diag(j)
         endif
 1    continue
      call diamua(nrow,job,a,ja,ia,diag,b,jb,ib)
      return
c-------end-of-roscal---------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine coscal(nrow,job,nrm,a,ja,ia,diag,b,jb,ib,ierr)
c-----------------------------------------------------------------------
      real*8 a(*),b(*),diag(nrow)
      integer nrow,job,ja(*),jb(*),ia(nrow+1),ib(nrow+1),ierr
c-----------------------------------------------------------------------
c scales the columns of A such that their norms are one on return
c result matrix written on b, or overwritten on A.
c 3 choices of norms: 1-norm, 2-norm, max-norm. in place.
c-----------------------------------------------------------------------
c on entry:
c ---------
c nrow	= integer. The row dimension of A
c
c job   = integer. job indicator. Job=0 means get array b only
c         job = 1 means get b, and the integer arrays ib, jb.
c
c nrm   = integer. norm indicator. nrm = 1, means 1-norm, nrm =2
c                  means the 2-nrm, nrm = 0 means max norm
c
c a,
c ja,
c ia   = Matrix A in compressed sparse row format.
c
c on return:
c----------
c
c diag = diagonal matrix stored as a vector containing the matrix
c        by which the columns have been scaled, i.e., on return
c        we have B = A * Diag
c
c b,
c jb,
c ib	= resulting matrix B in compressed sparse row sparse format.
c
c ierr  = error message. ierr=0     : Normal return
c                        ierr=i > 0 : Column number i is a zero row.
c Notes:
c-------
c 1)     The column dimension of A is not needed.
c 2)     algorithm in place (B can take the place of A).
c-----------------------------------------------------------------
      call cnrms (nrow,nrm,a,ja,ia,diag)
      ierr = 0
      do 1 j=1, nrow
         if (diag(j) .eq. 0.0) then
            ierr = j
            return
         else
            diag(j) = 1.0d0/diag(j)
         endif
 1    continue
      call amudia (nrow,job,a,ja,ia,diag,b,jb,ib)
      return
c--------end-of-coscal--------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine addblk(nrowa, ncola, a, ja, ia, ipos, jpos, job,
     & nrowb, ncolb, b, jb, ib, nrowc, ncolc, c, jc, ic, nzmx, ierr)
c      implicit none
      integer nrowa, nrowb, nrowc, ncola, ncolb, ncolc, ipos, jpos
      integer nzmx, ierr, job
      integer ja(1:*), ia(1:*), jb(1:*), ib(1:*), jc(1:*), ic(1:*)
      real*8 a(1:*), b(1:*), c(1:*)
c-----------------------------------------------------------------------
c     This subroutine adds a matrix B into a submatrix of A whose
c     (1,1) element is located in the starting position (ipos, jpos).
c     The resulting matrix is allowed to be larger than A (and B),
c     and the resulting dimensions nrowc, ncolc will be redefined
c     accordingly upon return.
c     The input matrices are assumed to be sorted, i.e. in each row
c     the column indices appear in ascending order in the CSR format.
c-----------------------------------------------------------------------
c on entry:
c ---------
c nrowa    = number of rows in A.
c bcola    = number of columns in A.
c a,ja,ia  = Matrix A in compressed sparse row format with entries sorted
c nrowb    = number of rows in B.
c ncolb    = number of columns in B.
c b,jb,ib  = Matrix B in compressed sparse row format with entries sorted
c
c nzmax	   = integer. The  length of the arrays c and jc. addblk will
c            stop if the number of nonzero elements in the matrix C
c            exceeds nzmax. See ierr.
c
c on return:
c----------
c nrowc    = number of rows in C.
c ncolc    = number of columns in C.
c c,jc,ic  = resulting matrix C in compressed sparse row sparse format
c            with entries sorted ascendly in each row.
c
c ierr	   = integer. serving as error message.
c         ierr = 0 means normal return,
c         ierr .gt. 0 means that addblk stopped while computing the
c         i-th row  of C with i=ierr, because the number
c         of elements in C exceeds nzmax.
c
c Notes:
c-------
c     this will not work if any of the two input matrices is not sorted
c-----------------------------------------------------------------------
      logical values
      integer i,j1,j2,ka,kb,kc,kamax,kbmax
      values = (job .ne. 0)
      ierr = 0
      nrowc = max(nrowa, nrowb+ipos-1)
      ncolc = max(ncola, ncolb+jpos-1)
      kc = 1
      kbmax = 0
      ic(1) = kc
c
      do 10 i=1, nrowc
         if (i.le.nrowa) then
            ka = ia(i)
            kamax = ia(i+1)-1
         else
            ka = ia(nrowa+1)
         end if
         if ((i.ge.ipos).and.((i-ipos).le.nrowb)) then
            kb = ib(i-ipos+1)
            kbmax = ib(i-ipos+2)-1
         else
            kb = ib(nrowb+1)
         end if
c
c     a do-while type loop -- goes through all the elements in a row.
c
 20      continue
         if (ka .le. kamax) then
            j1 = ja(ka)
         else
            j1 = ncolc+1
         endif
         if (kb .le. kbmax) then
            j2 = jb(kb) + jpos - 1
         else
            j2 = ncolc+1
         endif
c
c     if there are more elements to be added.
c
         if ((ka .le. kamax .or. kb .le. kbmax) .and.
     &        (j1 .le. ncolc .or. j2 .le. ncolc)) then
c
c     three cases
c
            if (j1 .eq. j2) then
               if (values) c(kc) = a(ka)+b(kb)
               jc(kc) = j1
               ka = ka+1
               kb = kb+1
               kc = kc+1
            else if (j1 .lt. j2) then
               jc(kc) = j1
               if (values) c(kc) = a(ka)
               ka = ka+1
               kc = kc+1
            else if (j1 .gt. j2) then
               jc(kc) = j2
               if (values) c(kc) = b(kb)
               kb = kb+1
               kc = kc+1
            endif
            if (kc .gt. nzmx) goto 999
            goto 20
         end if
         ic(i+1) = kc
 10   continue
      return
 999  ierr = i
      return
c---------end-of-addblk-------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine get1up (n,ja,ia,ju)
      integer  n, ja(*),ia(*),ju(*)
c----------------------------------------------------------------------
c obtains the first element of each row of the upper triangular part
c of a matrix. Assumes that the matrix is already sorted.
c-----------------------------------------------------------------------
c parameters
c input
c -----
c ja      = integer array containing the column indices of aij
c ia      = pointer array. ia(j) contains the position of the
c           beginning of row j in ja
c
c output
c ------
c ju      = integer array of length n. ju(i) is the address in ja
c           of the first element of the uper triangular part of
c           of A (including rthe diagonal. Thus if row i does have
c           a nonzero diagonal element then ju(i) will point to it.
c           This is a more general version of diapos.
c-----------------------------------------------------------------------
c local vAriables
      integer i, k
c
      do 5 i=1, n
         ju(i) = 0
         k = ia(i)
c
 1       continue
         if (ja(k) .ge. i) then
            ju(i) = k
            goto 5
         elseif (k .lt. ia(i+1) -1) then
            k=k+1
c
c go try next element in row
c
            goto 1
         endif
 5    continue
      return
c-----end-of-get1up-----------------------------------------------------
      end

c----------------------------------------------------------------------
      subroutine xtrows (i1,i2,a,ja,ia,ao,jao,iao,iperm,job)
      integer i1,i2,ja(*),ia(*),jao(*),iao(*),iperm(*),job
      real*8 a(*),ao(*)
c-----------------------------------------------------------------------
c this subroutine extracts given rows from a matrix in CSR format.
c Specifically, rows number iperm(i1), iperm(i1+1), ...., iperm(i2)
c are extracted and put in the output matrix ao, jao, iao, in CSR
c format.  NOT in place.
c Youcef Saad -- coded Feb 15, 1992.
c-----------------------------------------------------------------------
c on entry:
c----------
c i1,i2   = two integers indicating the rows to be extracted.
c           xtrows will extract rows iperm(i1), iperm(i1+1),..,iperm(i2),
c           from original matrix and stack them in output matrix
c           ao, jao, iao in csr format
c
c a, ja, ia = input matrix in csr format
c
c iperm	= integer array of length nrow containing the reverse permutation
c         array for the rows. row number iperm(j) in permuted matrix PA
c         used to be row number j in unpermuted matrix.
c         ---> a(i,j) in the permuted matrix was a(iperm(i),j)
c         in the inout matrix.
c
c job	= integer indicating the work to be done:
c 		job .ne. 1 : get structure only of output matrix,,
c               i.e., ignore real values. (in which case arrays a
c               and ao are not used nor accessed).
c 		job = 1	get complete data structure of output matrix.
c               (i.e., including arrays ao and iao).
c------------
c on return:
c------------
c ao, jao, iao = input matrix in a, ja, ia format
c note :
c        if (job.ne.1)  then the arrays a and ao are not used.
c----------------------------------------------------------------------c
c           Y. Saad, revised May  2, 1990                              c
c----------------------------------------------------------------------c
      logical values
      values = (job .eq. 1)
c
c copying
c
      ko = 1
      iao(1) = ko
      do 100 j=i1,i2
c
c ii=iperm(j) is the index of old row to be copied.
c
         ii = iperm(j)
         do 60 k=ia(ii), ia(ii+1)-1
            jao(ko) = ja(k)
            if (values) ao(ko) = a(k)
            ko = ko+1
 60      continue
         iao(j-i1+2) = ko
 100  continue
c
      return
c---------end-of-xtrows-------------------------------------------------
c-----------------------------------------------------------------------
      end
c-----------------------------------------------------------------------
      subroutine csrkvstr(n, ia, ja, nr, kvstr)
c-----------------------------------------------------------------------
      integer n, ia(n+1), ja(*), nr, kvstr(*)
c-----------------------------------------------------------------------
c     Finds block row partitioning of matrix in CSR format.
c-----------------------------------------------------------------------
c     On entry:
c--------------
c     n       = number of matrix scalar rows
c     ia,ja   = input matrix sparsity structure in CSR format
c
c     On return:
c---------------
c     nr      = number of block rows
c     kvstr   = first row number for each block row
c
c     Notes:
c-----------
c     Assumes that the matrix is sorted by columns.
c     This routine does not need any workspace.
c
c-----------------------------------------------------------------------
c     local variables
      integer i, j, jdiff
c-----------------------------------------------------------------------
      nr = 1
      kvstr(1) = 1
c---------------------------------
      do i = 2, n
         jdiff = ia(i+1)-ia(i)
         if (jdiff .eq. ia(i)-ia(i-1)) then
            do j = ia(i), ia(i+1)-1
               if (ja(j) .ne. ja(j-jdiff)) then
                  nr = nr + 1
                  kvstr(nr) = i
                  goto 299
               endif
            enddo
 299        continue
         else
 300        nr = nr + 1
            kvstr(nr) = i
         endif
      enddo
      kvstr(nr+1) = n+1
c---------------------------------
      return
      end
c-----------------------------------------------------------------------
c------------------------end-of-csrkvstr--------------------------------
      subroutine csrkvstc(n, ia, ja, nc, kvstc, iwk)
c-----------------------------------------------------------------------
      integer n, ia(n+1), ja(*), nc, kvstc(*), iwk(*)
c-----------------------------------------------------------------------
c     Finds block column partitioning of matrix in CSR format.
c-----------------------------------------------------------------------
c     On entry:
c--------------
c     n       = number of matrix scalar rows
c     ia,ja   = input matrix sparsity structure in CSR format
c
c     On return:
c---------------
c     nc      = number of block columns
c     kvstc   = first column number for each block column
c
c     Work space:
c----------------
c     iwk(*) of size equal to the number of scalar columns plus one.
c        Assumed initialized to 0, and left initialized on return.
c
c     Notes:
c-----------
c     Assumes that the matrix is sorted by columns.
c
c-----------------------------------------------------------------------
c     local variables
      integer i, j, k, ncol
c
c-----------------------------------------------------------------------

c-----use ncol to find maximum scalar column number
      ncol = 0

c-----mark the beginning position of the blocks in iwk
      do i = 1, n
         if (ia(i) .lt. ia(i+1)) then
            j = ja(ia(i))
            iwk(j) = 1
            do k = ia(i)+1, ia(i+1)-1
               j = ja(k)
               if (ja(k-1).ne.j-1) then
                  iwk(j) = 1
                  iwk(ja(k-1)+1) = 1
               endif
            enddo
            iwk(j+1) = 1
            ncol = max0(ncol, j)
         endif
      enddo
c---------------------------------
      nc = 1
      kvstc(1) = 1
      do i = 2, ncol+1
         if (iwk(i).ne.0) then
            nc = nc + 1
            kvstc(nc) = i
            iwk(i) = 0
         endif
      enddo
      nc = nc - 1
c---------------------------------
      return
      end
c-----------------------------------------------------------------------
c------------------------end-of-csrkvstc--------------------------------
c-----------------------------------------------------------------------
      subroutine kvstmerge(nr, kvstr, nc, kvstc, n, kvst)
c-----------------------------------------------------------------------
      integer nr, kvstr(nr+1), nc, kvstc(nc+1), n, kvst(*)
c-----------------------------------------------------------------------
c     Merges block partitionings, for conformal row/col pattern.
c-----------------------------------------------------------------------
c     On entry:
c--------------
c     nr,nc   = matrix block row and block column dimension
c     kvstr   = first row number for each block row
c     kvstc   = first column number for each block column
c
c     On return:
c---------------
c     n       = conformal row/col matrix block dimension
c     kvst    = conformal row/col block partitioning
c
c     Notes:
c-----------
c     If matrix is not square, this routine returns without warning.
c
c-----------------------------------------------------------------------
c-----local variables
      integer i,j
c---------------------------------

      if (kvstr(nr+1) .ne. kvstc(nc+1)) return

      i = 1
      j = 1
      n = 1
  200 if (i .gt. nr+1) then
         kvst(n) = kvstc(j)
         j = j + 1
      elseif (j .gt. nc+1) then
         kvst(n) = kvstr(i)
         i = i + 1
      elseif (kvstc(j) .eq. kvstr(i)) then
         kvst(n) = kvstc(j)
         j = j + 1
         i = i + 1
      elseif (kvstc(j) .lt. kvstr(i)) then
         kvst(n) = kvstc(j)
         j = j + 1
      else
         kvst(n) = kvstr(i)
         i = i + 1
      endif
      n = n + 1
      if (i.le.nr+1 .or. j.le.nc+1) goto 200
      n = n - 2
c---------------------------------
      return
c------------------------end-of-kvstmerge-------------------------------
      end