1c $Id: reordering.f,v 1.1 2008-04-11 06:01:06 geuzaine Exp $
2c----------------------------------------------------------------------c
3c                          S P A R S K I T                             c
4c----------------------------------------------------------------------c
5c               ROERDERING ROUTINES -- LEVEL SET BASED ROUTINES        c
6c----------------------------------------------------------------------c
7c BSF     : Breadth-First Seearch traversal (Cuthill mc kee ordering)  c
8c dblstr  : two-way dissection partitioning -- with equal size domains c
9c stripes : routine used by dblstr to assign points                    c
10c perphn  : finds a peripheral node and does a BFS search from it.     c
11c add_lvst: routine for adding a new level set in BFS algorithm        c
12c reversp : routine to reverse a given permuation (e.g., for RCMK)     c
13c maskdeg : integer function to compute the `masked' of a node         c
14c----------------------------------------------------------------------c
15      subroutine BFS(n,ja,ia,nfirst,iperm,mask,maskval,riord,levels,
16     *     nlev)
17      implicit none
18      integer n,ja(*),ia(*),nfirst,iperm(n),mask(n),riord(*),levels(*),
19     *     nlev,maskval
20c-----------------------------------------------------------------------
21c finds the level-structure (breadth-first-search or CMK) ordering for a
22c given sparse matrix. Uses add_lvst. Allows a  set of nodes to be
23c the initial level (instead of just one node). Allows masked nodes.
24c-------------------------parameters------------------------------------
25c on entry:
26c----------
27c n      = number of nodes in the graph
28c ja, ia = pattern of matrix in CSR format (the ja,ia arrays of csr data
29c          structure)
30c nfirst = number of nodes in the first level that is input in riord
31c iperm  = integer array indicating in which order to  traverse the graph
32c          in order to generate all connected components.
33c          The nodes will be traversed in order iperm(1),....,iperm(n)
34c          Convention:
35c          if iperm(1) .eq. 0 on entry then BFS will traverse the
36c          nodes in the  order 1,2,...,n.
37c
38c riord  = (also an ouput argument). on entry riord contains the labels
39c          of the nfirst nodes that constitute the first level.
40c
41c mask   = array used to indicate whether or not a node should be
42c          condidered in the graph. see maskval.
43c          mask is also used as a marker of  visited nodes.
44c
45c maskval= consider node i only when:  mask(i) .eq. maskval
46c          maskval must be .gt. 0.
47c          thus, to consider all nodes, take mask(1:n) = 1.
48c          maskval=1 (for example)
49c
50c on return
51c ---------
52c mask   = on return mask is restored to its initial state.
53c riord  = `reverse permutation array'. Contains the labels of the nodes
54c          constituting all the levels found, from the first level to
55c          the last.
56c levels = pointer array for the level structure. If lev is a level
57c          number, and k1=levels(lev),k2=levels(lev+1)-1, then
58c          all the nodes of level number lev are:
59c          riord(k1),riord(k1+1),...,riord(k2)
60c nlev   = number of levels found
61c-----------------------------------------------------------------------
62c Notes on possible usage
63c-------------------------
64c 1. if you want a CMK ordering from a known node, say node init then
65c    call BFS with nfirst=1,iperm(1) =0, mask(1:n) =1, maskval =1,
66c    riord(1) = init.
67c 2. if you want the RCMK ordering and you have a preferred initial node
68c     then use above call followed by reversp(n,riord)
69c 3. Similarly to 1, and 2, but you know a good LEVEL SET to start from
70c    (nfirst = number if nodes in the level, riord(1:nfirst) contains
71c    the nodes.
72c 4. If you do not know how to select a good initial node in 1 and 2,
73c    then you should use perphn instead.
74c
75c-----------------------------------------------------------------------
76c     local variables --
77      integer j, ii, nod, istart, iend
78      logical permut
79      permut = (iperm(1) .ne. 0)
80c
81c     start pointer structure to levels
82c
83      nlev   = 0
84c
85c     previous end
86c
87      istart = 0
88      ii = 0
89c
90c     current end
91c
92      iend = nfirst
93c
94c     intialize masks to zero -- except nodes of first level --
95c
96      do 12 j=1, nfirst
97         mask(riord(j)) = 0
98 12   continue
99c-----------------------------------------------------------------------
100 13   continue
101c
102 1    nlev = nlev+1
103      levels(nlev) = istart + 1
104      call add_lvst (istart,iend,nlev,riord,ja,ia,mask,maskval)
105      if (istart .lt. iend) goto 1
106 2    ii = ii+1
107      if (ii .le. n) then
108         nod = ii
109         if (permut) nod = iperm(nod)
110         if (mask(nod) .eq. maskval) then
111c
112c     start a new level
113c
114            istart = iend
115            iend = iend+1
116            riord(iend) = nod
117            mask(nod) = 0
118            goto 1
119         else
120            goto 2
121         endif
122      endif
123c-----------------------------------------------------------------------
124 3    levels(nlev+1) = iend+1
125      do j=1, iend
126         mask(riord(j)) = maskval
127      enddo
128      return
129c-----------------------------------------------------------------------
130c-----end-of-BFS--------------------------------------------------------
131      end
132c-----------------------------------------------------------------------
133      subroutine dblstr(n,ja,ia,ip1,ip2,nfirst,riord,ndom,map,mapptr,
134     *     mask,levels,iwk)
135      implicit none
136      integer ndom,ja(*),ia(*),ip1,ip2,nfirst,riord(*),map(*),mapptr(*),
137     *     mask(*),levels(*),iwk(*),nextdom
138c-----------------------------------------------------------------------
139c this routine performs a two-way partitioning of a graph using
140c level sets recursively. First a coarse set is found by a
141c simple cuthill-mc Kee type algorithm. Them each of the large
142c domains is further partitioned into subsets using the same
143c technique. The ip1 and ip2 parameters indicate the desired number
144c number of partitions 'in each direction'. So the total number of
145c partitions on return ought to be equal (or close) to ip1*ip2
146c----------------------parameters----------------------------------------
147c on entry:
148c---------
149c n      = row dimension of matrix == number of vertices in graph
150c ja, ia = pattern of matrix in CSR format (the ja,ia arrays of csr data
151c          structure)
152c ip1    = integer indicating the number of large partitions ('number of
153c          paritions in first direction')
154c ip2    = integer indicating the number of smaller partitions, per
155c          large partition, ('number of partitions in second direction')
156c nfirst = number of nodes in the first level that is input in riord
157c riord  = (also an ouput argument). on entry riord contains the labels
158c          of the nfirst nodes that constitute the first level.
159c on return:
160c-----------
161c ndom   = total number of partitions found
162c map    = list of nodes listed partition by pertition from partition 1
163c          to paritition ndom.
164c mapptr = pointer array for map. All nodes from position
165c          k1=mapptr(idom),to position k2=mapptr(idom+1)-1 in map belong
166c          to partition idom.
167c work arrays:
168c-------------
169c mask   = array of length n, used to hold the partition number of each
170c          node for the first (large) partitioning.
171c          mask is also used as a marker of  visited nodes.
172c levels = integer array of length .le. n used to hold the pointer
173c          arrays for the various level structures obtained from BFS.
174c-----------------------------------------------------------------------
175      integer n, j,idom,kdom,jdom,maskval,k,nlev,init,ndp1,numnod
176      maskval = 1
177      do j=1, n
178         mask(j) = maskval
179      enddo
180      iwk(1) = 0
181      call BFS(n,ja,ia,nfirst,iwk,mask,maskval,riord,levels,nlev)
182c      init = riord(1)
183c      call perphn (ja,ia,mask,maskval,init,nlev,riord,levels)
184      call stripes (nlev,riord,levels,ip1,map,mapptr,ndom)
185c-----------------------------------------------------------------------
186      if (ip2 .eq. 1) return
187      ndp1 = ndom+1
188c
189c     pack info into array iwk
190c
191      do j = 1, ndom+1
192         iwk(j) = ndp1+mapptr(j)
193      enddo
194      do j=1, mapptr(ndom+1)-1
195         iwk(ndp1+j) = map(j)
196      enddo
197c-----------------------------------------------------------------------
198      do idom=1, ndom
199         do k=mapptr(idom),mapptr(idom+1)-1
200            mask(map(k)) = idom
201         enddo
202      enddo
203      nextdom = 1
204c
205c     jdom = counter for total number of (small) subdomains
206c
207      jdom = 1
208      mapptr(jdom) = 1
209c-----------------------------------------------------------------------
210      do idom =1, ndom
211         maskval = idom
212         nfirst = 1
213         numnod = iwk(idom+1) - iwk(idom)
214         j = iwk(idom)
215         init = iwk(j)
216         nextdom = mapptr(jdom)
217         call perphn(numnod,ja,ia,init,iwk(j),mask,maskval,
218     *        nlev,riord,levels)
219         call stripes (nlev,riord,levels,ip2,map(nextdom),
220     *        mapptr(jdom),kdom)
221         mapptr(jdom) = nextdom
222         do j = jdom,jdom+kdom-1
223            mapptr(j+1) = nextdom + mapptr(j+1)-1
224         enddo
225         jdom = jdom + kdom
226      enddo
227c
228      ndom = jdom - 1
229      return
230      end
231c-----------------------------------------------------------------------
232      subroutine perphn(n,ja,ia,init,iperm,mask,maskval,nlev,riord,
233     *     levels)
234      implicit none
235      integer n,ja(*),ia(*),init,iperm(*),mask(*),maskval,
236     *     nlev,riord(*),levels(*)
237c-----------------------------------------------------------------------
238c     finds a pseudo-peripheral node and does a BFS search from it.
239c-----------------------------------------------------------------------
240c see routine  dblstr for description of parameters
241c input:
242c-------
243c ja, ia  = list pointer array for the adjacency graph
244c mask    = array used for masking nodes -- see maskval
245c maskval = value to be checked against for determing whether or
246c           not a node is masked. If mask(k) .ne. maskval then
247c           node k is not considered.
248c init    = init node in the pseudo-peripheral node algorithm.
249c
250c output:
251c-------
252c init    = actual pseudo-peripherial node found.
253c nlev    = number of levels in the final BFS traversal.
254c riord   =
255c levels  =
256c-----------------------------------------------------------------------
257      integer j,nlevp,deg,nfirst,mindeg,nod,maskdeg
258      nlevp = 0
259 1    continue
260      riord(1) = init
261      nfirst = 1
262      call BFS(n,ja,ia,nfirst,iperm,mask,maskval,riord,levels,nlev)
263      if (nlev .gt. nlevp) then
264         mindeg = levels(nlev+1)-1
265         do j=levels(nlev),levels(nlev+1)-1
266            nod = riord(j)
267            deg = maskdeg(ja,ia,nod,mask,maskval)
268            if (deg .lt. mindeg) then
269               init = nod
270               mindeg = deg
271            endif
272         enddo
273         nlevp = nlev
274         goto 1
275      endif
276      return
277      end
278c-----------------------------------------------------------------------
279      subroutine add_lvst(istart,iend,nlev,riord,ja,ia,mask,maskval)
280      integer nlev, nod, riord(*), ja(*), ia(*), mask(*)
281c----------------------------------------------------------------------
282c adds one level set to the previous sets. span all nodes of previous
283c set. Uses Mask to mark those already visited.
284c-----------------------------------------------------------------------
285      nod = iend
286      do 25 ir = istart+1,iend
287         i = riord(ir)
288         do 24 k=ia(i),ia(i+1)-1
289            j = ja(k)
290            if (mask(j) .eq. maskval) then
291               nod = nod+1
292               mask(j) = 0
293               riord(nod) = j
294            endif
295 24      continue
296 25   continue
297      istart = iend
298      iend   = nod
299      return
300c-----------------------------------------------------------------------
301      end
302c-----------------------------------------------------------------------
303      subroutine stripes (nlev,riord,levels,ip,map,mapptr,ndom)
304      implicit none
305      integer nlev,riord(*),levels(nlev+1),ip,map(*),
306     *    mapptr(*), ndom
307c-----------------------------------------------------------------------
308c    this is a post processor to BFS. stripes uses the output of BFS to
309c    find a decomposition of the adjacency graph by stripes. It fills
310c    the stripes level by level until a number of nodes .gt. ip is
311c    is reached.
312c---------------------------parameters-----------------------------------
313c on entry:
314c --------
315c nlev   = number of levels as found by BFS
316c riord  = reverse permutation array produced by BFS --
317c levels = pointer array for the level structure as computed by BFS. If
318c          lev is a level number, and k1=levels(lev),k2=levels(lev+1)-1,
319c          then all the nodes of level number lev are:
320c                      riord(k1),riord(k1+1),...,riord(k2)
321c  ip    = number of desired partitions (subdomains) of about equal size.
322c
323c on return
324c ---------
325c ndom     = number of subgraphs (subdomains) found
326c map      = node per processor list. The nodes are listed contiguously
327c            from proc 1 to nproc = mpx*mpy.
328c mapptr   = pointer array for array map. list for proc. i starts at
329c            mapptr(i) and ends at mapptr(i+1)-1 in array map.
330c-----------------------------------------------------------------------
331c local variables.
332c
333      integer ib,ktr,ilev,k,nsiz,psiz
334      ndom = 1
335      ib = 1
336c to add: if (ip .le. 1) then ...
337      nsiz = levels(nlev+1) - levels(1)
338      psiz = (nsiz-ib)/max(1,(ip - ndom + 1)) + 1
339      mapptr(ndom) = ib
340      ktr = 0
341      do 10 ilev = 1, nlev
342c
343c     add all nodes of this level to domain
344c
345         do 3 k=levels(ilev), levels(ilev+1)-1
346            map(ib) = riord(k)
347            ib = ib+1
348            ktr = ktr + 1
349            if (ktr .ge. psiz  .or. k .ge. nsiz) then
350               ndom = ndom + 1
351               mapptr(ndom) = ib
352               psiz = (nsiz-ib)/max(1,(ip - ndom + 1)) + 1
353               ktr = 0
354            endif
355c
356 3       continue
357 10   continue
358      ndom = ndom-1
359      return
360c-----------------------------------------------------------------------
361c-----end-of-stripes----------------------------------------------------
362      end
363c-----------------------------------------------------------------------
364      subroutine rversp (n, riord)
365      integer n, riord(n)
366c-----------------------------------------------------------------------
367c     this routine does an in-place reversing of the permutation array
368c     riord --
369c-----------------------------------------------------------------------
370      integer j, k
371      do 26 j=1,n/2
372         k = riord(j)
373         riord(j) = riord(n-j+1)
374         riord(n-j+1) = k
375 26   continue
376      return
377      end
378c-----------------------------------------------------------------------
379      integer function maskdeg  (ja,ia,nod,mask,maskval)
380      implicit none
381      integer ja(*),ia(*),nod,mask(*),maskval
382c-----------------------------------------------------------------------
383      integer deg, k
384      deg = 0
385      do k =ia(nod),ia(nod+1)-1
386         if (mask(ja(k)) .eq. maskval) deg = deg+1
387      enddo
388      maskdeg = deg
389      return
390      end
391c-----------------------------------------------------------------------
392