1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
6module hecmw_local_matrix
7  use hecmw_util
8  use hecmw_pair_array
9
10  private
11  public :: hecmwST_local_matrix
12  public :: hecmw_localmat_write
13  public :: hecmw_localmat_blocking
14  public :: hecmw_localmat_free
15  public :: hecmw_localmat_mulvec
16  public :: hecmw_trimatmul_TtKT
17  public :: hecmw_trimatmul_TtKT_mpc
18  public :: hecmw_localmat_transpose
19  public :: hecmw_localmat_assemble
20  public :: hecmw_localmat_add
21  public :: hecmw_localmat_init_with_hecmat
22  public :: hecmw_localmat_add_hecmat
23  public :: hecmw_localmat_multmat
24  public :: hecmw_localmat_make_hecmat
25  public :: hecmw_localmat_shrink_comm_table
26
27  type hecmwST_local_matrix
28    integer :: nr, nc, nnz, ndof
29    integer(kind=kint), pointer :: index(:)
30    integer(kind=kint), pointer :: item(:)
31    real(kind=kreal), pointer :: A(:)
32  end type hecmwST_local_matrix
33
34  integer(kind=kint), parameter :: cNCOL_ITEM = 2 !< num of column items to be migrated (2 or 3)
35  integer(kind=kint), parameter :: cLID = 1       !< index for local ID in belonging rank (node_ID(2*i-1))
36  integer(kind=kint), parameter :: cRANK = 2      !< index for belonging rank (node_ID(2*i))
37  integer(kind=kint), parameter :: cGID = 3       !< index for global ID (used only when cNCOL_ITEM==3)
38
39  integer(kind=kint), parameter :: DEBUG = 0
40  integer(kind=kint), parameter :: TIMER = 0
41
42contains
43
44  subroutine hecmw_localmat_write(Tmat,iunit)
45    implicit none
46    type (hecmwST_local_matrix), intent(in) :: Tmat
47    integer(kind=kint), intent(in) :: iunit
48    integer(kind=kint) :: nr, nc, nnz, ndof, ndof2, i, js, je, j, jj
49    nr=Tmat%nr
50    nc=Tmat%nc
51    nnz=Tmat%nnz
52    ndof=Tmat%ndof
53    ndof2=ndof*ndof
54    write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
55    write(iunit,*) 'i, j, A'
56    do i=1,nr
57      js=Tmat%index(i-1)+1
58      je=Tmat%index(i)
59      do j=js,je
60        jj=Tmat%item(j)
61        if (ndof==1) then
62          write(iunit,*) i, jj, Tmat%A(j)
63        else
64          write(iunit,*) i, jj
65          write(iunit,*) Tmat%A((j-1)*ndof2+1:j*ndof2)
66        endif
67      enddo
68    enddo
69  end subroutine hecmw_localmat_write
70
71  subroutine hecmw_localmat_write_ij(Tmat,iunit)
72    implicit none
73    type (hecmwST_local_matrix), intent(in) :: Tmat
74    integer(kind=kint), intent(in) :: iunit
75    integer(kind=kint) :: nr, nc, nnz, ndof, i, js, je, j, jj
76    nr=Tmat%nr
77    nc=Tmat%nc
78    nnz=Tmat%nnz
79    ndof=Tmat%ndof
80    write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
81    write(iunit,*) 'i, j'
82    do i=1,nr
83      js=Tmat%index(i-1)+1
84      je=Tmat%index(i)
85      do j=js,je
86        jj=Tmat%item(j)
87        write(iunit,*) i, jj
88      enddo
89    enddo
90  end subroutine hecmw_localmat_write_ij
91
92  subroutine hecmw_localmat_blocking(Tmat, ndof, BTmat)
93    implicit none
94    type (hecmwST_local_matrix), intent(in) :: Tmat
95    integer, intent(in) :: ndof
96    type (hecmwST_local_matrix), intent(out) :: BTmat
97    integer, allocatable :: iw(:)
98    integer :: ndof2, i, icnt, idof, idx, ls, le, l, j, jb, k, lb0, jdof, ks, ke
99    ndof2=ndof*ndof
100
101    if (mod(Tmat%nr, ndof) /= 0 .or. mod(Tmat%nc, ndof) /= 0) then
102      write(0,*) Tmat%nr, Tmat%nc, ndof
103      stop 'ERROR: blocking_Tmat failed'
104    endif
105    BTmat%nr=Tmat%nr/ndof
106    BTmat%nc=Tmat%nc/ndof
107    BTmat%ndof=ndof
108
109    allocate(iw(BTmat%nc))
110    allocate(BTmat%index(0:BTmat%nr))
111
112    BTmat%index(0)=0
113    do i=1,BTmat%nr
114      icnt=0
115      do idof=1,ndof
116        idx=(i-1)*ndof+idof
117        ls=Tmat%index(idx-1)+1
118        le=Tmat%index(idx)
119        lcol: do l=ls,le
120          j=Tmat%item(l)
121          jb=(j-1)/ndof+1
122          do k=1,icnt
123            if (iw(k)==jb) cycle lcol
124          enddo
125          icnt=icnt+1
126          iw(icnt)=jb
127        enddo lcol
128      enddo
129      BTmat%index(i)=BTmat%index(i-1)+icnt
130    enddo
131
132    BTmat%nnz=BTmat%index(BTmat%nr)
133    allocate(BTmat%item(BTmat%nnz))
134    allocate(BTmat%A(BTmat%nnz*ndof2))
135    BTmat%A=0.d0
136
137    do i=1,BTmat%nr
138      icnt=0
139      do idof=1,ndof
140        idx=(i-1)*ndof+idof
141        ls=Tmat%index(idx-1)+1
142        le=Tmat%index(idx)
143        lcol2: do l=ls,le
144          j=Tmat%item(l)
145          jb=(j-1)/ndof+1
146          do k=1,icnt
147            if (iw(k)==jb) cycle lcol2
148          enddo
149          icnt=icnt+1
150          iw(icnt)=jb
151        enddo lcol2
152      enddo
153      ! if (icnt /= BTmat%index(i)-BTmat%index(i-1)) stop 'ERROR: blocking Tmat'
154      ! ! call qsort(iw, 1, icnt)
155      lb0=BTmat%index(i-1)
156      do k=1,icnt
157        BTmat%item(lb0+k)=iw(k)
158      enddo
159      do idof=1,ndof
160        idx=(i-1)*ndof+idof
161        ls=Tmat%index(idx-1)+1
162        le=Tmat%index(idx)
163        lcol3: do l=ls,le
164          j=Tmat%item(l)
165          jb=(j-1)/ndof+1
166          jdof=mod((j-1), ndof)+1
167          ks=BTmat%index(i-1)+1
168          ke=BTmat%index(i)
169          do k=ks,ke
170            if (BTmat%item(k)==jb) then
171              BTmat%A((k-1)*ndof2+(idof-1)*ndof+jdof)=Tmat%A(l)
172              cycle lcol3
173            endif
174          enddo
175          stop 'ERROR: something wrong in blocking Tmat'
176        enddo lcol3
177      enddo
178    enddo
179  end subroutine hecmw_localmat_blocking
180
181  subroutine hecmw_localmat_free(Tmat)
182    implicit none
183    type (hecmwST_local_matrix), intent(inout) :: Tmat
184    deallocate(Tmat%index)
185    if (associated(Tmat%item)) deallocate(Tmat%item)
186    if (associated(Tmat%A)) deallocate(Tmat%A)
187    Tmat%nr=0
188    Tmat%nc=0
189    Tmat%nnz=0
190    Tmat%ndof=0
191  end subroutine hecmw_localmat_free
192
193  subroutine hecmw_trimatmul_TtKT(hecMESH, BTtmat, hecMAT, BTmat, &
194      iwS, num_lagrange, hecTKT)
195    use hecmw_matrix_misc
196    implicit none
197    type (hecmwST_local_mesh), intent(inout) :: hecMESH
198    type (hecmwST_local_matrix), intent(inout) :: BTtmat, BTmat
199    type (hecmwST_matrix), intent(in) :: hecMAT
200    integer(kind=kint), intent(in) :: iwS(:)
201    integer(kind=kint), intent(in) :: num_lagrange
202    type (hecmwST_matrix), intent(inout) :: hecTKT
203    if (hecMESH%n_neighbor_pe == 0) then
204      call hecmw_trimatmul_TtKT_serial(hecMESH, BTtmat, hecMAT, BTmat, &
205           iwS, num_lagrange, hecTKT)
206    else
207      call hecmw_trimatmul_TtKT_parallel(hecMESH, BTtmat, hecMAT, BTmat, &
208           iwS, num_lagrange, hecTKT)
209    endif
210  end subroutine hecmw_trimatmul_TtKT
211
212  subroutine hecmw_trimatmul_TtKT_serial(hecMESH, BTtmat, hecMAT, BTmat, &
213      iwS, num_lagrange, hecTKT)
214    use hecmw_matrix_misc
215    implicit none
216    type (hecmwST_local_mesh), intent(in) :: hecMESH
217    type (hecmwST_local_matrix), intent(in) :: BTtmat, BTmat
218    type (hecmwST_matrix), intent(in) :: hecMAT
219    integer(kind=kint), intent(in) :: iwS(:)
220    integer(kind=kint), intent(in) :: num_lagrange
221    type (hecmwST_matrix), intent(inout) :: hecTKT
222    type (hecmwST_local_matrix) :: BTtKT
223    real(kind=kreal) :: num
224
225    ! perform three matrices multiplication for elimination
226    call trimatmul_TtKT(BTtmat, hecMAT, BTmat, BTtKT)
227    !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)'
228    !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank())
229
230    ! place small numbers where the DOF is eliminated
231    !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
232    num = 1.d0
233    call place_num_on_diag(BTtKT, iwS, num_lagrange, num)
234    !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)'
235    !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank())
236
237    ! make_new HECMW matrix
238    call make_new_hecmat(hecMAT, BTtKT, hecTKT)
239    call hecmw_localmat_free(BTtKT)
240  end subroutine hecmw_trimatmul_TtKT_serial
241
242  subroutine hecmw_trimatmul_TtKT_parallel(hecMESH, BTtmat, hecMAT, BTmat, &
243      iwS, num_lagrange, hecTKT)
244    use hecmw_matrix_misc
245    implicit none
246    type (hecmwST_local_mesh), intent(inout) :: hecMESH
247    type (hecmwST_local_matrix), intent(inout) :: BTtmat, BTmat
248    type (hecmwST_matrix), intent(in) :: hecMAT
249    integer(kind=kint), intent(in) :: iwS(:)
250    integer(kind=kint), intent(in) :: num_lagrange
251    type (hecmwST_matrix), intent(inout) :: hecTKT
252    type (hecmwST_local_matrix) :: BKmat, BTtKmat, BTtKTmat
253    real(kind=kreal) :: num
254    real(kind=kreal) :: t0, t1
255
256    ! perform three matrices multiplication for elimination
257    t0 = hecmw_wtime()
258    call hecmw_localmat_init_with_hecmat(BKmat, hecMAT)
259    if (DEBUG >= 3) then
260      write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)'
261      if (DEBUG == 3) then
262        call hecmw_localmat_write_ij(BKmat, 700+hecmw_comm_get_rank())
263      else
264        call hecmw_localmat_write(BKmat, 700+hecmw_comm_get_rank())
265      endif
266    endif
267    t1 = hecmw_wtime()
268    if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (1) : ",t1-t0
269
270    t0 = hecmw_wtime()
271    call hecmw_localmat_multmat(BTtmat, BKmat, hecMESH, BTtKmat)
272    if (DEBUG >= 2) write(0,*) '  DEBUG2: multiply Tt and K done'
273    if (DEBUG >= 3) then
274      write(700+hecmw_comm_get_rank(),*) 'BTtKmat'
275      if (DEBUG == 3) then
276        call hecmw_localmat_write_ij(BTtKmat, 700+hecmw_comm_get_rank())
277      else
278        call hecmw_localmat_write(BTtKmat, 700+hecmw_comm_get_rank())
279      endif
280    endif
281    call hecmw_localmat_free(BKmat)
282    t1 = hecmw_wtime()
283    if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (2) : ",t1-t0
284
285    t0 = hecmw_wtime()
286    call hecmw_localmat_multmat(BTtKmat, BTmat, hecMESH, BTtKTmat)
287    if (DEBUG >= 2) write(0,*) '  DEBUG2: multiply TtK and T done'
288    if (DEBUG >= 3) then
289      write(700+hecmw_comm_get_rank(),*) 'BTtKTmat'
290      if (DEBUG == 3) then
291        call hecmw_localmat_write_ij(BTtKTmat, 700+hecmw_comm_get_rank())
292      else
293        call hecmw_localmat_write(BTtKTmat, 700+hecmw_comm_get_rank())
294      endif
295    endif
296    call hecmw_localmat_free(BTtKmat)
297    t1 = hecmw_wtime()
298    if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (3) : ",t1-t0
299
300    t0 = hecmw_wtime()
301    ! place small numbers where the DOF is eliminated
302    !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
303    num = 1.d0
304    call place_num_on_diag(BTtKTmat, iwS, num_lagrange, num)
305    if (DEBUG >= 3) then
306      write(700+hecmw_comm_get_rank(),*) 'num_lagrange =', num_lagrange
307      write(700+hecmw_comm_get_rank(),*) 'iwS(1:num_lagrange)'
308      write(700+hecmw_comm_get_rank(),*) iwS(1:num_lagrange)
309      write(700+hecmw_comm_get_rank(),*) 'BTtKTmat (place 1.0 on slave diag)'
310      if (DEBUG == 3) then
311        call hecmw_localmat_write_ij(BTtKTmat, 700+hecmw_comm_get_rank())
312      else
313        call hecmw_localmat_write(BTtKTmat, 700+hecmw_comm_get_rank())
314      endif
315    endif
316    t1 = hecmw_wtime()
317    if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (4) : ",t1-t0
318
319    t0 = hecmw_wtime()
320    ! make_new HECMW matrix
321    call make_new_hecmat(hecMAT, BTtKTmat, hecTKT)
322    call hecmw_localmat_free(BTtKTmat)
323    t1 = hecmw_wtime()
324    if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (5) : ",t1-t0
325  end subroutine hecmw_trimatmul_TtKT_parallel
326
327  subroutine trimatmul_TtKT(BTtmat, hecMAT, BTmat, BTtKT)
328    implicit none
329    type (hecmwST_local_matrix), intent(in) :: BTtmat, BTmat
330    type (hecmwST_matrix), intent(in) :: hecMAT
331    type (hecmwST_local_matrix), intent(out) :: BTtKT
332    integer :: nr, nc, ndof, ndof2, i, icnt, js, je, j, jj, ks, ke, k, kk
333    integer :: ls, le, l, ll, m, ms, me, mm
334    integer, allocatable :: iw(:)
335    real(kind=kreal), pointer :: Ttp(:), Kp(:), Tp(:), TtKTp(:)
336    ! real(kind=kreal) :: tsym_s, tsym_e, tnum_s, tnum_e
337
338    nr=BTtmat%nr
339    nc=BTmat%nc
340    ndof=BTtmat%ndof
341    ndof2=ndof*ndof
342
343    BTtKT%nr=nr
344    BTtKT%nc=nc
345    BTtKT%ndof=ndof
346    allocate(BTtKT%index(0:nr))
347
348    ! tsym_s = hecmw_wtime()
349
350    !$omp parallel default(none), &
351      !$omp&         private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m), &
352      !$omp&         shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT)
353    allocate(iw(nc))
354    !$omp do
355    do i=1,nr
356      icnt=0
357      js=BTtmat%index(i-1)+1
358      je=BTtmat%index(i)
359      do j=js,je
360        jj=BTtmat%item(j)
361        ! lower
362        ks=hecMAT%indexL(jj-1)+1
363        ke=hecMAT%indexL(jj)
364        do k=ks,ke
365          kk=hecMAT%itemL(k)
366          ls=BTmat%index(kk-1)+1
367          le=BTmat%index(kk)
368          ll1: do l=ls,le
369            ll=BTmat%item(l)
370            do m=1,icnt
371              if (iw(m)==ll) cycle ll1
372            enddo
373            icnt=icnt+1
374            iw(icnt)=ll
375            !if (i==1) write(0,*) 'l', icnt, jj, kk, ll
376          enddo ll1
377        enddo
378        ! diagonal
379        ls=BTmat%index(jj-1)+1
380        le=BTmat%index(jj)
381        ll2: do l=ls,le
382          ll=BTmat%item(l)
383          do m=1,icnt
384            if (iw(m)==ll) cycle ll2
385          enddo
386          icnt=icnt+1
387          iw(icnt)=ll
388          !if (i==1) write(0,*) 'd', icnt, jj, kk, ll
389        enddo ll2
390        ! upper
391        ks=hecMAT%indexU(jj-1)+1
392        ke=hecMAT%indexU(jj)
393        do k=ks,ke
394          kk=hecMAT%itemU(k)
395          ls=BTmat%index(kk-1)+1
396          le=BTmat%index(kk)
397          ll3: do l=ls,le
398            ll=BTmat%item(l)
399            do m=1,icnt
400              if (iw(m)==ll) cycle ll3
401            enddo
402            icnt=icnt+1
403            iw(icnt)=ll
404            !if (i==1) write(0,*) 'u', icnt, jj, kk, ll
405          enddo ll3
406        enddo
407      enddo
408      if (icnt == 0) icnt=1
409      !if (i==1) write(0,*) iw(1:icnt)
410      BTtKT%index(i)=icnt
411    enddo
412    !$omp end do
413    deallocate(iw)
414    !$omp end parallel
415
416    ! tsym_e = hecmw_wtime()
417    ! write(0,*) 'tsym:',tsym_e-tsym_s
418
419    BTtKT%index(0)=0
420    do i=1,nr
421      BTtKT%index(i)=BTtKT%index(i-1)+BTtKT%index(i)
422    enddo
423    !write(0,*) BTtKT%index(1:n)-BTtKT%index(0:n-1)
424
425    BTtKT%nnz=BTtKT%index(nr)
426    allocate(BTtKT%item(BTtKT%nnz))
427    allocate(BTtKT%A(BTtKT%nnz*ndof2))
428    BTtKT%item=0
429    BTtKT%A=0.d0
430
431    ! tnum_s = hecmw_wtime()
432
433    !$omp parallel default(none), &
434      !$omp&         private(i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m, &
435      !$omp&                 ms,me,mm,Ttp,Kp,Tp,TtKTp), &
436      !$omp&         shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT,ndof,ndof2)
437    !$omp do
438    do i=1,nr
439      icnt=0
440      ms=BTtKT%index(i-1)+1
441      !me=BTtKT%index(i)
442      js=BTtmat%index(i-1)+1
443      je=BTtmat%index(i)
444      do j=js,je
445        jj=BTtmat%item(j)
446        Ttp=>BTtmat%A((j-1)*ndof2+1:j*ndof2)
447        ! lower
448        ks=hecMAT%indexL(jj-1)+1
449        ke=hecMAT%indexL(jj)
450        do k=ks,ke
451          kk=hecMAT%itemL(k)
452          Kp=>hecMAT%AL((k-1)*ndof2+1:k*ndof2)
453          ls=BTmat%index(kk-1)+1
454          le=BTmat%index(kk)
455          do l=ls,le
456            ll=BTmat%item(l)
457            Tp=>BTmat%A((l-1)*ndof2+1:l*ndof2)
458            me=ms-1+icnt
459            mm=-1
460            do m=ms,me
461              if (BTtKT%item(m)==ll) mm=m
462            enddo
463            if (mm<0) then
464              icnt=icnt+1
465              mm=me+1
466              BTtKT%item(mm)=ll
467              !if (i==1) write(0,*) 'l', mm, jj, kk, ll
468            endif
469            TtKTp=>BTtKT%A((mm-1)*ndof2+1:mm*ndof2)
470            call blk_trimatmul_add(ndof, Ttp, Kp, Tp, TtKTp)
471          enddo
472        enddo
473        ! diagonal
474        Kp=>hecMAT%D((jj-1)*ndof2+1:jj*ndof2)
475        ls=BTmat%index(jj-1)+1
476        le=BTmat%index(jj)
477        do l=ls,le
478          ll=BTmat%item(l)
479          Tp=>BTmat%A((l-1)*ndof2+1:l*ndof2)
480          me=ms-1+icnt
481          mm=-1
482          do m=ms,me
483            if (BTtKT%item(m)==ll) mm=m
484          enddo
485          if (mm<0) then
486            icnt=icnt+1
487            mm=me+1
488            BTtKT%item(mm)=ll
489            !if (i==1) write(0,*) 'd', mm, jj, kk, ll
490          endif
491          TtKTp=>BTtKT%A((mm-1)*ndof2+1:mm*ndof2)
492          call blk_trimatmul_add(ndof, Ttp, Kp, Tp, TtKTp)
493        enddo
494        ! upper
495        ks=hecMAT%indexU(jj-1)+1
496        ke=hecMAT%indexU(jj)
497        do k=ks,ke
498          kk=hecMAT%itemU(k)
499          Kp=>hecMAT%AU((k-1)*ndof2+1:k*ndof2)
500          ls=BTmat%index(kk-1)+1
501          le=BTmat%index(kk)
502          do l=ls,le
503            ll=BTmat%item(l)
504            Tp=>BTmat%A((l-1)*ndof2+1:l*ndof2)
505            me=ms-1+icnt
506            mm=-1
507            do m=ms,me
508              if (BTtKT%item(m)==ll) mm=m
509            enddo
510            if (mm<0) then
511              icnt=icnt+1
512              mm=me+1
513              BTtKT%item(mm)=ll
514              !if (i==1) write(0,*) 'u', mm, jj, kk, ll
515            endif
516            TtKTp=>BTtKT%A((mm-1)*ndof2+1:mm*ndof2)
517            call blk_trimatmul_add(ndof, Ttp, Kp, Tp, TtKTp)
518          enddo
519        enddo
520      enddo
521      if (icnt == 0) then
522        icnt=1
523        BTtKT%item(ms)=i
524      endif
525      ! error check!
526      !write(0,*) BTtKT%item(ms:ms-1+icnt)
527      !write(0,*) BTtKT%index(i)-BTtKT%index(i-1), icnt
528      if (ms-1+icnt /= BTtKT%index(i)) stop 'ERROR: trimatmul'
529    enddo
530    !$omp end do
531    !$omp end parallel
532
533    ! tnum_e = hecmw_wtime()
534    ! write(0,*) 'tnum:',tnum_e-tnum_s
535  end subroutine trimatmul_TtKT
536
537  subroutine blk_trimatmul_add(ndof, A, B, C, ABC)
538    implicit none
539    integer, intent(in) :: ndof
540    real(kind=kreal), intent(in) :: A(:), B(:), C(:)
541    real(kind=kreal), intent(inout) :: ABC(:)
542    real(kind=kreal), allocatable :: AB(:)
543    integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
544
545    ndof2=ndof*ndof
546    allocate(AB(ndof2))
547    AB=0.d0
548
549    do i=1,ndof
550      i0=(i-1)*ndof
551      do j=1,ndof
552        ij=i0+j
553        j0=(j-1)*ndof
554        do k=1,ndof
555          ik=i0+k
556          jk=j0+k
557          AB(ik)=AB(ik)+A(ij)*B(jk)
558        enddo
559      enddo
560    enddo
561
562    do i=1,ndof
563      i0=(i-1)*ndof
564      do j=1,ndof
565        ij=i0+j
566        j0=(j-1)*ndof
567        do k=1,ndof
568          ik=i0+k
569          jk=j0+k
570          ABC(ik)=ABC(ik)+AB(ij)*C(jk)
571        enddo
572      enddo
573    enddo
574
575    deallocate(AB)
576  end subroutine blk_trimatmul_add
577
578  subroutine place_num_on_diag(BTtKT, iwS, num_lagrange, num)
579    implicit none
580    type (hecmwST_local_matrix), intent(inout) :: BTtKT
581    integer(kind=kint), intent(in) :: iwS(:)
582    integer(kind=kint), intent(in) :: num_lagrange
583    real(kind=kreal), intent(in) :: num
584    integer(kind=kint) :: ndof, ndof2, ilag, i, idof, js, je, j, jj
585    integer(kind=kint) :: nmissing, k, ks, ke
586    integer(kind=kint), allocatable :: missing(:), cnt(:)
587    integer(kind=kint), pointer :: index(:), item(:)
588    real(kind=kreal), pointer :: A(:)
589
590    ndof=BTtKT%ndof
591    ndof2=ndof*ndof
592
593    ! check if there are places
594    allocate(missing(num_lagrange))
595    nmissing = 0
596    outer1: do ilag=1,num_lagrange
597      i=(iwS(ilag)-1)/ndof+1
598      idof=mod(iwS(ilag)-1, ndof)+1
599      js=BTtKT%index(i-1)+1
600      je=BTtKT%index(i)
601      do j=js,je
602        jj=BTtKT%item(j)
603        if (jj==i) cycle outer1  ! found place
604      enddo
605      ! not found
606      do k=1,nmissing
607        if (missing(k) == i) cycle outer1 ! already marked as missing
608      enddo
609      nmissing = nmissing + 1
610      missing(nmissing) = i
611    enddo outer1
612
613    ! if not, reallocate
614    if (nmissing > 0) then
615      allocate(cnt(BTtKT%nr))
616      allocate(index(0:BTtKT%nr))
617      do i=1,BTtKT%nr
618        cnt(i) = BTtKT%index(i) - BTtKT%index(i-1)
619      enddo
620      do i=1,nmissing
621        cnt(missing(i)) = cnt(missing(i)) + 1
622      enddo
623      call make_index(BTtKT%nr, cnt, index)
624      allocate(item(BTtKT%nnz + nmissing))
625      allocate(A(ndof2 * (BTtKT%nnz + nmissing)))
626      do i=1,BTtKT%nr
627        ks=index(i-1)+1
628        js=BTtKT%index(i-1)+1
629        je=BTtKT%index(i)
630        item(ks:ks+(je-js))=BTtKT%item(js:je)
631        A(ndof2*(ks-1)+1:ndof2*(ks+(je-js)))=BTtKT%A(ndof2*(js-1)+1:ndof2*je)
632      enddo
633      do i=1,nmissing
634        ke=index(missing(i))
635        item(ke)=missing(i)
636        A(ndof2*(ke-1)+1:ndof2*ke)=0.d0
637      enddo
638      deallocate(BTtKT%index)
639      deallocate(BTtKT%item)
640      deallocate(BTtKT%A)
641      BTtKT%index => index
642      BTtKT%item => item
643      BTtKT%A => A
644      BTtKT%nnz = index(BTtKT%nr)
645      deallocate(cnt)
646    endif
647    deallocate(missing)
648
649    ! place num
650    outer: do ilag=1,num_lagrange
651      i=(iwS(ilag)-1)/ndof+1
652      idof=mod(iwS(ilag)-1, ndof)+1
653      js=BTtKT%index(i-1)+1
654      je=BTtKT%index(i)
655      do j=js,je
656        jj=BTtKT%item(j)
657        if (jj==i) then
658          !write(0,*) ilag, i, idof
659          BTtKT%A((j-1)*ndof2+(idof-1)*ndof+idof)=num
660          cycle outer
661        endif
662      enddo
663    enddo outer
664  end subroutine place_num_on_diag
665
666  subroutine replace_hecmat(hecMAT, BTtKT)
667    implicit none
668    type (hecmwST_matrix), intent(inout) :: hecMAT
669    type (hecmwST_local_matrix), intent(in) :: BTtKT
670    integer :: nr, nc, ndof, ndof2, i, nl, nu, js, je, j, jj
671    integer :: ksl, ksu, k
672
673    nr=BTtKT%nr
674    nc=BTtKT%nc
675    ndof=hecMAT%NDOF
676    ndof2=ndof*ndof
677
678    ! free old hecMAT
679    if (associated(hecMAT%AL)) deallocate(hecMAT%AL)
680    if (associated(hecMAT%AU)) deallocate(hecMAT%AU)
681    if (associated(hecMAT%itemL)) deallocate(hecMAT%itemL)
682    if (associated(hecMAT%itemU)) deallocate(hecMAT%itemU)
683    hecMAT%indexL=0
684    hecMAT%indexU=0
685
686    ! count NPL, NPU
687    !$omp parallel default(none),private(i,nl,nu,js,je,j,jj), &
688      !$omp&         shared(nr,BTtKT,hecMAT)
689    !$omp do
690    do i=1,nr
691      nl=0
692      nu=0
693      js=BTtKT%index(i-1)+1
694      je=BTtKT%index(i)
695      do j=js,je
696        jj=BTtKT%item(j)
697        if (jj < i) then
698          nl=nl+1
699        elseif (i < jj) then
700          nu=nu+1
701        else
702          ! diagonal
703        endif
704      enddo
705      hecMAT%indexL(i)=nl
706      hecMAT%indexU(i)=nu
707    enddo
708    !$omp end do
709    !$omp end parallel
710
711    hecMAT%indexL(0)=0
712    hecMAT%indexU(0)=0
713    do i=1,nc
714      hecMAT%indexL(i)=hecMAT%indexL(i-1)+hecMAT%indexL(i)
715      hecMAT%indexU(i)=hecMAT%indexU(i-1)+hecMAT%indexU(i)
716    enddo
717    hecMAT%NPL=hecMAT%indexL(nc)
718    hecMAT%NPU=hecMAT%indexU(nc)
719
720    ! allocate new hecMAT
721    allocate(hecMAT%itemL(hecMAT%NPL), hecMAT%itemU(hecMAT%NPU))
722    allocate(hecMAT%AL(hecMAT%NPL*ndof2), hecMAT%AU(hecMAT%NPU*ndof2))
723    hecMAT%itemL=0
724    hecMAT%itemU=0
725    hecMAT%D=0.d0
726    hecMAT%AL=0.d0
727    hecMAT%AU=0.d0
728
729    ! copy from BTtKT to hecMAT
730    !$omp parallel default(none),private(i,nl,nu,js,je,ksl,ksu,j,jj,k), &
731      !$omp&  shared(nr,BTtKT,hecMAT,ndof2)
732    !$omp do
733    do i=1,nr
734      nl=0
735      nu=0
736      js=BTtKT%index(i-1)+1
737      je=BTtKT%index(i)
738      ksl=hecMAT%indexL(i-1)+1
739      ksu=hecMAT%indexU(i-1)+1
740      do j=js,je
741        jj=BTtKT%item(j)
742        if (jj < i) then
743          k=ksl+nl
744          hecMAT%itemL(k)=jj
745          hecMAT%AL((k-1)*ndof2+1:k*ndof2)=BTtKT%A((j-1)*ndof2+1:j*ndof2)
746          nl=nl+1
747        elseif (i < jj) then
748          k=ksu+nu
749          hecMAT%itemU(k)=jj
750          hecMAT%AU((k-1)*ndof2+1:k*ndof2)=BTtKT%A((j-1)*ndof2+1:j*ndof2)
751          nu=nu+1
752        else
753          hecMAT%D((i-1)*ndof2+1:i*ndof2)=BTtKT%A((j-1)*ndof2+1:j*ndof2)
754        endif
755      enddo
756      ! if (ksl+nl /= hecMAT%indexL(i)+1) stop 'ERROR: indexL'
757      ! if (ksu+nu /= hecMAT%indexU(i)+1) stop 'ERROR: indexU'
758    enddo
759    !$omp end do
760    !$omp end parallel
761
762    ! do i=1,hecMAT%NPL
763    !   if (hecMAT%itemL(i) <= 0) stop 'ERROR: negative itemL'
764    !   if (hecMAT%itemL(i) > nc) stop 'ERROR: too big itemL'
765    ! enddo
766    ! do i=1,hecMAT%NPU
767    !   if (hecMAT%itemU(i) <= 0) stop 'ERROR: negative itemU'
768    !   if (hecMAT%itemU(i) > nc) stop 'ERROR: too big itemU'
769    ! enddo
770  end subroutine replace_hecmat
771
772  subroutine make_new_hecmat(hecMAT, BTtKT, hecTKT)
773    implicit none
774    type(hecmwST_matrix), intent(in) :: hecMAT
775    type(hecmwST_local_matrix), intent(in) :: BTtKT
776    type(hecmwST_matrix), intent(inout) :: hecTKT
777    integer(kind=kint) :: nr, nc, ndof, ndof2
778
779    nr=BTtKT%nr
780    nc=BTtKT%nc
781    ndof=BTtKT%ndof
782    ndof2=ndof*ndof
783
784    !write(0,*) 'DEBUG: nr, nc =',nr,nc
785
786    ! if (nr /= nc) then
787    !   stop 'ERROR: nr /= nc'
788    ! endif
789    hecTKT%N =hecMAT%N
790    hecTKT%NP=nc
791    hecTKT%NDOF=ndof
792
793    if (associated(hecTKT%D)) deallocate(hecTKT%D)
794    allocate(hecTKT%D(nc*ndof2))
795
796    if (associated(hecTKT%indexL)) deallocate(hecTKT%indexL)
797    if (associated(hecTKT%indexU)) deallocate(hecTKT%indexU)
798    allocate(hecTKT%indexL(0:nc))
799    allocate(hecTKT%indexU(0:nc))
800
801    hecTKT%Iarray=hecMAT%Iarray
802    hecTKT%Rarray=hecMAT%Rarray
803
804    call replace_hecmat(hecTKT, BTtKT)
805  end subroutine make_new_hecmat
806
807  subroutine hecmw_localmat_mulvec(BTmat, V, TV)
808    implicit none
809    type (hecmwST_local_matrix), intent(in) :: BTmat
810    real(kind=kreal), intent(in), target :: V(:)
811    real(kind=kreal), intent(out), target :: TV(:)
812    real(kind=kreal), pointer :: TVp(:), Tp(:), Vp(:)
813    integer :: nr, ndof, ndof2, i, js, je, j, jj, k, kl0, l
814    !!$    real(kind=kreal) :: vnorm
815
816    nr=BTmat%nr
817    ndof=BTmat%ndof
818    ndof2=ndof*ndof
819
820    TV=0.d0
821
822    !!$    vnorm=0.d0
823    !!$    do i=1,nr*ndof
824    !!$      vnorm=vnorm+V(i)**2
825    !!$    enddo
826    !!$    write(0,*) 'vnorm:', sqrt(vnorm)
827
828    !$omp parallel default(none),private(i,TVp,js,je,j,jj,Tp,Vp,k,kl0,l), &
829      !$omp&  shared(nr,TV,ndof,BTmat,ndof2,V)
830    !$omp do
831    do i=1,nr
832      TVp=>TV((i-1)*ndof+1:i*ndof)
833      js=BTmat%index(i-1)+1
834      je=BTmat%index(i)
835      do j=js,je
836        jj=BTmat%item(j)
837        Tp=>BTmat%A((j-1)*ndof2+1:j*ndof2)
838        Vp=>V((jj-1)*ndof+1:jj*ndof)
839        do k=1,ndof
840          kl0=(k-1)*ndof
841          do l=1,ndof
842            TVp(k)=TVp(k)+Tp(kl0+l)*Vp(l)
843          enddo
844        enddo
845      enddo
846    enddo
847    !$omp end do
848    !$omp end parallel
849  end subroutine hecmw_localmat_mulvec
850
851  subroutine hecmw_trimatmul_TtKT_mpc(hecMESH, hecMAT, hecTKT)
852    implicit none
853    type (hecmwST_local_mesh), intent(inout) :: hecMESH
854    type (hecmwST_matrix), intent(in) :: hecMAT
855    type (hecmwST_matrix), intent(inout) :: hecTKT
856    type (hecmwST_local_matrix) :: BTmat, BTtmat
857    integer(kind=kint), allocatable :: iwS(:)
858    integer(kind=kint) :: ndof, n_mpc, i_mpc
859    integer(kind=kint) :: i, j, k, kk, ilag
860    real(kind=kreal) :: t0, t1
861    t0 = hecmw_wtime()
862    ndof=hecMAT%NDOF
863    n_mpc=0
864    OUTER: do i=1,hecMESH%mpc%n_mpc
865      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
866        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER
867      enddo
868      n_mpc=n_mpc+1
869    enddo OUTER
870    allocate(iwS(n_mpc))
871    i_mpc=0
872    OUTER2: do i=1,hecMESH%mpc%n_mpc
873      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
874        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER2
875      enddo
876      i_mpc=i_mpc+1
877      k=hecMESH%mpc%mpc_index(i-1)+1
878      kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k)
879      iwS(i_mpc)=kk
880    enddo OUTER2
881    if (DEBUG >= 2) then
882      write(700+hecmw_comm_get_rank(),*) 'DEBUG: n_mpc, slaves',n_mpc,iwS(1:n_mpc)
883    endif
884    t1 = hecmw_wtime()
885    if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (1) : ",t1-t0
886    t0 = hecmw_wtime()
887    call make_BTmat_mpc(hecMESH, ndof, BTmat)
888    if (DEBUG >= 3) then
889      write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTmat(MPC)'
890      if (DEBUG == 3) then
891        call hecmw_localmat_write_ij(BTmat,700+hecmw_comm_get_rank())
892      else
893        call hecmw_localmat_write(BTmat,700+hecmw_comm_get_rank())
894      endif
895    endif
896    t1 = hecmw_wtime()
897    if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (2) : ",t1-t0
898    t0 = hecmw_wtime()
899    ! call make_BTtmat_mpc(hecMESH, ndof, BTtmat)
900    call hecmw_localmat_transpose(BTmat, BTtmat)
901    ! if (hecmw_localmat_equal(BTtmat, BTtmat2) == 0) then
902    !   write(0,*) 'ERROR: BTtmat2 is incorrect!!!'
903    ! else
904    !   write(0,*) 'DEBUG: BTtmat2 is correct'
905    ! endif
906    if (DEBUG >= 3) then
907      write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtmat(MPC)'
908      if (DEBUG == 3) then
909        call hecmw_localmat_write_ij(BTtmat,700+hecmw_comm_get_rank())
910      else
911        call hecmw_localmat_write(BTtmat,700+hecmw_comm_get_rank())
912      endif
913    endif
914    t1 = hecmw_wtime()
915    if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (3) : ",t1-t0
916    t0 = hecmw_wtime()
917    call hecmw_trimatmul_TtKT(hecMESH, BTtmat, hecMAT, BTmat, iwS, n_mpc, hecTKT)
918    t1 = hecmw_wtime()
919    if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (4) : ",t1-t0
920    t0 = hecmw_wtime()
921
922    if (associated(hecTKT%B)) deallocate(hecTKT%B)
923    if (associated(hecTKT%X)) deallocate(hecTKT%X)
924    allocate(hecTKT%B(ndof*hecTKT%NP))
925    allocate(hecTKT%X(ndof*hecTKT%NP))
926    do i=1, size(hecMAT%B)
927      hecTKT%B(i) = hecMAT%B(i)
928    enddo
929    do i=1, size(hecMAT%X)
930      hecTKT%X(i) = hecMAT%X(i)
931    enddo
932    do ilag=1,n_mpc
933      hecTKT%X(iwS(ilag)) = 0.d0
934    enddo
935
936    call hecmw_localmat_free(BTmat)
937    call hecmw_localmat_free(BTtmat)
938    ! call hecmw_localmat_free(BTtmat2)
939    deallocate(iwS)
940    t1 = hecmw_wtime()
941    if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (5) : ",t1-t0
942  end subroutine hecmw_trimatmul_TtKT_mpc
943
944  subroutine make_BTmat_mpc(hecMESH, ndof, BTmat)
945    implicit none
946    type (hecmwST_local_mesh), intent(in) :: hecMESH
947    integer(kind=kint), intent(in) :: ndof
948    type (hecmwST_local_matrix), intent(out) :: BTmat
949    type (hecmwST_local_matrix) :: Tmat
950    integer(kind=kint) :: n_mpc
951    integer(kind=kint) :: i,j,k,js,jj,kk
952    n_mpc=0
953    OUTER: do i=1,hecMESH%mpc%n_mpc
954      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
955        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER
956      enddo
957      n_mpc=n_mpc+1
958    enddo OUTER
959    Tmat%nr=hecMESH%n_node*ndof
960    Tmat%nc=Tmat%nr
961    Tmat%ndof=1
962    allocate(Tmat%index(0:Tmat%nr))
963    ! count nonzero in each row
964    Tmat%index(1:Tmat%nr)=1
965    OUTER2: do i=1,hecMESH%mpc%n_mpc
966      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
967        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER2
968      enddo
969      k=hecMESH%mpc%mpc_index(i-1)+1
970      kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k)
971      Tmat%index(kk)=hecMESH%mpc%mpc_index(i)-hecMESH%mpc%mpc_index(i-1)-1
972    enddo OUTER2
973    ! index
974    Tmat%index(0)=0
975    do i=1,Tmat%nr
976      Tmat%index(i)=Tmat%index(i-1)+Tmat%index(i)
977    enddo
978    Tmat%nnz=Tmat%index(Tmat%nr)
979    allocate(Tmat%item(Tmat%nnz), Tmat%A(Tmat%nnz))
980    ! diag
981    do i=1,Tmat%nr
982      js=Tmat%index(i-1)+1
983      Tmat%item(js)=i
984      Tmat%A(js)=1.d0
985    enddo
986    ! others
987    OUTER3: do i=1,hecMESH%mpc%n_mpc
988      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
989        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER3
990      enddo
991      k=hecMESH%mpc%mpc_index(i-1)+1
992      kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k)
993      js=Tmat%index(kk-1)+1
994      do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
995        jj = ndof * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
996        Tmat%item(js)=jj
997        Tmat%A(js)=-hecMESH%mpc%mpc_val(j)
998        js=js+1
999      enddo
1000    enddo OUTER3
1001    !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Tmat(MPC)'
1002    !call hecmw_localmat_write(Tmat,700+hecmw_comm_get_rank())
1003    call hecmw_localmat_blocking(Tmat, ndof, BTmat)
1004    call hecmw_localmat_free(Tmat)
1005  end subroutine make_BTmat_mpc
1006
1007  subroutine make_BTtmat_mpc(hecMESH, ndof, BTtmat)
1008    implicit none
1009    type (hecmwST_local_mesh), intent(in) :: hecMESH
1010    integer(kind=kint), intent(in) :: ndof
1011    type (hecmwST_local_matrix), intent(out) :: BTtmat
1012    type (hecmwST_local_matrix) :: Ttmat
1013    integer(kind=kint) :: n_mpc
1014    integer(kind=kint) :: i,j,k,js,je,jj,kk
1015    integer(kind=kint), allocatable :: iw(:)
1016    n_mpc=0
1017    OUTER: do i=1,hecMESH%mpc%n_mpc
1018      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
1019        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER
1020      enddo
1021      n_mpc=n_mpc+1
1022    enddo OUTER
1023    Ttmat%nr=hecMESH%n_node*ndof
1024    Ttmat%nc=Ttmat%nr
1025    Ttmat%ndof=1
1026    allocate(Ttmat%index(0:Ttmat%nr))
1027    ! count nonzero in each row
1028    Ttmat%index(1:Ttmat%nr)=1
1029    OUTER2: do i=1,hecMESH%mpc%n_mpc
1030      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
1031        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER2
1032      enddo
1033      k=hecMESH%mpc%mpc_index(i-1)+1
1034      kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k)
1035      Ttmat%index(kk)=0
1036      do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
1037        jj = ndof * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
1038        Ttmat%index(jj)=Ttmat%index(jj)+1
1039      enddo
1040    enddo OUTER2
1041    ! index
1042    Ttmat%index(0)=0
1043    do i=1,Ttmat%nr
1044      Ttmat%index(i)=Ttmat%index(i-1)+Ttmat%index(i)
1045    enddo
1046    Ttmat%nnz=Ttmat%index(Ttmat%nr)
1047    allocate(Ttmat%item(Ttmat%nnz), Ttmat%A(Ttmat%nnz))
1048    ! diag
1049    do i=1,Ttmat%nr
1050      js=Ttmat%index(i-1)+1
1051      je=Ttmat%index(i)
1052      if (js <= je) then
1053        Ttmat%item(js)=i
1054        Ttmat%A(js)=1.d0
1055      endif
1056    enddo
1057    ! others
1058    allocate(iw(Ttmat%nr))
1059    iw(:)=1
1060    OUTER3: do i=1,hecMESH%mpc%n_mpc
1061      do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i)
1062        if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER3
1063      enddo
1064      k=hecMESH%mpc%mpc_index(i-1)+1
1065      kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k)
1066      do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
1067        jj = ndof * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
1068        js=Ttmat%index(jj-1)+1+iw(jj)
1069        Ttmat%item(js)=kk
1070        Ttmat%A(js)=-hecMESH%mpc%mpc_val(j)
1071        iw(jj)=iw(jj)+1
1072      enddo
1073    enddo OUTER3
1074    deallocate(iw)
1075    !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Ttmat(MPC)'
1076    !call hecmw_localmat_write(Ttmat,700+hecmw_comm_get_rank())
1077    call hecmw_localmat_blocking(Ttmat, ndof, BTtmat)
1078    call hecmw_localmat_free(Ttmat)
1079  end subroutine make_BTtmat_mpc
1080
1081  subroutine hecmw_localmat_transpose(Tmat, Ttmat)
1082    implicit none
1083    type (hecmwST_local_matrix), intent(in) :: Tmat
1084    type (hecmwST_local_matrix), intent(out) :: Ttmat
1085    integer(kind=kint), allocatable :: iw(:)
1086    integer(kind=kint) :: i, j, jj, ndof, ndof2, k, idof, jdof
1087    allocate(iw(Tmat%nc))
1088    iw = 0
1089    do i = 1, Tmat%nr
1090      do j = Tmat%index(i-1)+1, Tmat%index(i)
1091        jj = Tmat%item(j)
1092        iw(jj) = iw(jj) + 1
1093      enddo
1094    enddo
1095    Ttmat%nr = Tmat%nc
1096    Ttmat%nc = Tmat%nr
1097    Ttmat%nnz = Tmat%nnz
1098    Ttmat%ndof = Tmat%ndof
1099    ndof = Tmat%ndof
1100    ndof2 = ndof * ndof
1101    allocate(Ttmat%index(0:Ttmat%nr))
1102    allocate(Ttmat%item(Ttmat%nnz))
1103    allocate(Ttmat%A(Ttmat%nnz*ndof2))
1104    Ttmat%index(0) = 0
1105    do i = 1, Ttmat%nr
1106      Ttmat%index(i) = Ttmat%index(i-1) + iw(i)
1107      iw(i) = Ttmat%index(i-1) + 1
1108    enddo
1109    do i = 1, Tmat%nr
1110      do j = Tmat%index(i-1)+1, Tmat%index(i)
1111        jj = Tmat%item(j)
1112        k = iw(jj)
1113        Ttmat%item( k ) = i
1114        do idof = 1, ndof
1115          do jdof = 1, ndof
1116            Ttmat%A((k-1)*ndof2+(idof-1)*ndof+jdof) = &
1117              Tmat%A((j-1)*ndof2+(jdof-1)*ndof+idof)
1118          enddo
1119        enddo
1120        iw(jj) = k + 1
1121      enddo
1122    enddo
1123  end subroutine hecmw_localmat_transpose
1124
1125  function hecmw_localmat_equal(Tmat1, Tmat2)
1126    implicit none
1127    type (hecmwST_local_matrix), intent(in) :: Tmat1, Tmat2
1128    integer(kind=kint) :: hecmw_localmat_equal
1129    integer(kind=kint) :: i, j, k0, k, ndof, ndof2
1130    hecmw_localmat_equal = 0
1131    if (Tmat1%nr /= Tmat2%nr) return
1132    if (Tmat1%nc /= Tmat2%nc) return
1133    if (Tmat1%nnz /= Tmat2%nnz) return
1134    if (Tmat1%ndof /= Tmat2%ndof) return
1135    ndof = Tmat1%ndof
1136    ndof2 = ndof * ndof
1137    do i = 1, Tmat1%nr
1138      if (Tmat1%index(i) /= Tmat2%index(i)) return
1139      do j = Tmat1%index(i-1)+1, Tmat1%index(i)
1140        if (Tmat1%item(j) /= Tmat2%item(j)) return
1141        k0 = (j-1)*ndof2
1142        do k = 1, ndof2
1143          if (Tmat1%A(k0+k) /= Tmat2%A(k0+k)) return
1144        enddo
1145      enddo
1146    enddo
1147    hecmw_localmat_equal = 1
1148  end function hecmw_localmat_equal
1149
1150!!!
1151!!! Subroutines for parallel contact analysis with iterative linear solver
1152!!!
1153
1154  subroutine hecmw_localmat_assemble(BTmat, hecMESH, hecMESHnew)
1155    implicit none
1156    type (hecmwST_local_matrix), intent(inout) :: BTmat
1157    type (hecmwST_local_mesh), intent(in) :: hecMESH
1158    type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1159    integer(kind=kint) :: nn_int, np, ndof, ndof2, nr_ext, nnz_ext
1160    integer(kind=kint), allocatable :: exp_rows_index(:), exp_cols_index(:)
1161    integer(kind=kint), allocatable :: exp_rows_item(:,:), exp_cols_item(:,:)
1162    type (hecmwST_local_matrix), allocatable :: BT_ext(:)
1163    type (hecmwST_local_matrix) :: BT_int
1164    type (hecmwST_local_matrix) :: BTnew
1165    ! some checks
1166    if (DEBUG >= 1) write(0,*) 'DEBUG: nr,nc,nnz,ndof',BTmat%nr,BTmat%nc,BTmat%nnz,BTmat%ndof
1167    if (BTmat%nr /= hecMESH%n_node) stop 'ERROR: invalid size in hecmw_localmat_assemble'
1168    !
1169    nn_int = hecMESH%nn_internal
1170    np = hecMESH%n_node
1171    ndof = BTmat%ndof
1172    ndof2 = ndof*ndof
1173    !
1174    nr_ext = np - nn_int
1175    nnz_ext = BTmat%index(np) - BTmat%index(nn_int)
1176    !
1177    call prepare_BT_ext(BTmat, hecMESH, exp_rows_index, exp_rows_item, BT_ext)
1178    if (DEBUG >= 1) write(0,*) 'DEBUG: prepare_BT_ext done'
1179    !
1180    call prepare_column_info(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1181    if (DEBUG >= 1) write(0,*) 'DEBUG: prepare_column info done'
1182    !
1183    call send_BT_ext_and_recv_BT_int(hecMESH, exp_rows_index, exp_rows_item, BT_ext, &
1184         exp_cols_index, exp_cols_item, BT_int, hecMESHnew)
1185    if (DEBUG >= 1) write(0,*) 'DEBUG: send BT_ext and recv BT_int done'
1186    !
1187    !write(0,*) 'BTmat%ndof,BT_int%ndof',BTmat%ndof,BT_int%ndof
1188    call hecmw_localmat_add(BTmat, BT_int, BTnew)
1189    if (DEBUG >= 1) write(0,*) 'DEBUG: localmat_add done'
1190    !
1191    call hecmw_localmat_free(BTmat)
1192    call hecmw_localmat_free(BT_int)
1193    !
1194    BTmat%nr = BTnew%nr
1195    BTmat%nc = BTnew%nc
1196    BTmat%nnz = BTnew%nnz
1197    BTmat%ndof = BTnew%ndof
1198    BTmat%index => BTnew%index
1199    BTmat%item => BTnew%item
1200    BTmat%A => BTnew%A
1201    !
1202    ! hecMESH%n_node = hecMESHnew%n_node
1203    ! hecMESH%n_neighbor_pe = hecMESHnew%n_neighbor_pe
1204    ! deallocate(hecMESH%neighbor_pe)
1205    ! deallocate(hecMESH%import_index)
1206    ! deallocate(hecMESH%export_index)
1207    ! deallocate(hecMESH%import_item)
1208    ! deallocate(hecMESH%export_item)
1209    ! deallocate(hecMESH%node_ID)
1210    ! deallocate(hecMESH%global_node_ID)
1211    ! hecMESH%neighbor_pe => hecMESHnew%neighbor_pe
1212    ! hecMESH%import_index => hecMESHnew%import_index
1213    ! hecMESH%export_index => hecMESHnew%export_index
1214    ! hecMESH%import_item => hecMESHnew%import_item
1215    ! hecMESH%export_item => hecMESHnew%export_item
1216    ! hecMESH%node_ID => hecMESHnew%node_ID
1217    ! hecMESH%global_node_ID => hecMESHnew%global_node_ID
1218    !
1219    if (DEBUG >= 1) write(0,*) 'DEBUG: update BTmat and hecMESH done'
1220  end subroutine hecmw_localmat_assemble
1221
1222  subroutine prepare_BT_ext(BTmat, hecMESH, exp_rows_index, exp_rows_item, BT_ext)
1223    implicit none
1224    type (hecmwST_local_matrix), intent(in) :: BTmat
1225    type (hecmwST_local_mesh), intent(in) :: hecMESH
1226    integer(kind=kint), allocatable, intent(out) :: exp_rows_index(:)
1227    integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1228    type (hecmwST_local_matrix), allocatable, intent(out) :: BT_ext(:)
1229    integer(kind=kint), allocatable :: incl_nz(:), exp_cols_per_row(:), exp_rows_per_rank(:)
1230    integer(kind=kint) :: nn_int
1231    nn_int = hecMESH%nn_internal
1232    !
1233    call check_external_nz_blocks(BTmat, nn_int, incl_nz)
1234    !
1235    call count_ext_rows_with_nz(BTmat, nn_int, incl_nz, exp_cols_per_row)
1236    !
1237    call count_exp_rows_per_rank(hecMESH, exp_cols_per_row, exp_rows_per_rank)
1238    !
1239    allocate(exp_rows_index(0:hecMESH%n_neighbor_pe))
1240    call make_index(hecMESH%n_neighbor_pe, exp_rows_per_rank, exp_rows_index)
1241    !write(0,*) 'exp_rows_index',exp_rows_index(:)
1242    !
1243    deallocate(exp_rows_per_rank)
1244    !
1245    call make_exp_rows_item(hecMESH, exp_cols_per_row, exp_rows_index, exp_rows_item)
1246    !
1247    deallocate(exp_cols_per_row)
1248    !
1249    allocate(BT_ext(hecMESH%n_neighbor_pe))
1250    call extract_BT_ext(hecMESH, BTmat, incl_nz, exp_rows_index, exp_rows_item, BT_ext)
1251    !
1252    deallocate(incl_nz)
1253  end subroutine prepare_BT_ext
1254
1255  subroutine check_external_nz_blocks(BTmat, nn_internal, incl_nz)
1256    implicit none
1257    type (hecmwST_local_matrix), intent(in) :: BTmat
1258    integer(kind=kint), intent(in) :: nn_internal
1259    integer(kind=kint), allocatable, intent(out) :: incl_nz(:)
1260    integer(kind=kint) :: ndof2, i0, nnz_ext, i, k, nnz_blk
1261    if (nn_internal > BTmat%nr) stop 'ERROR: invalid nn_internal'
1262    ndof2 = BTmat%ndof ** 2
1263    i0 = BTmat%index(nn_internal)
1264    nnz_ext = BTmat%index(BTmat%nr) - i0
1265    allocate(incl_nz(nnz_ext))
1266    nnz_blk = 0
1267    do i = 1, nnz_ext
1268      incl_nz(i) = 0
1269      do k = 1, ndof2
1270        if (BTmat%A(ndof2*(i0+i-1)+k) /= 0.0d0) then
1271          incl_nz(i) = 1
1272          nnz_blk = nnz_blk + 1
1273          exit
1274        endif
1275      enddo
1276    enddo
1277    if (DEBUG >= 1) write(0,*) 'DEBUG: nnz_blk',nnz_blk
1278  end subroutine check_external_nz_blocks
1279
1280  subroutine count_ext_rows_with_nz(BTmat, nn_internal, incl_nz, exp_cols_per_row)
1281    implicit none
1282    type (hecmwST_local_matrix), intent(in) :: BTmat
1283    integer(kind=kint), intent(in) :: nn_internal
1284    integer(kind=kint), intent(in) :: incl_nz(:)
1285    integer(kind=kint), allocatable, intent(out) :: exp_cols_per_row(:)
1286    integer(kind=kint) :: nr_ext, nnz_int, i, irow, js, je, j, jcol
1287    nr_ext = BTmat%nr - nn_internal
1288    nnz_int = BTmat%index(nn_internal)
1289    allocate(exp_cols_per_row(nr_ext))
1290    exp_cols_per_row(:) = 0
1291    do i = 1, nr_ext
1292      irow = nn_internal+i
1293      js = BTmat%index(irow-1)+1
1294      je = BTmat%index(irow)
1295      do j = js, je
1296        jcol = BTmat%item(j)
1297        if (incl_nz(j-nnz_int) == 1) exp_cols_per_row(i) = exp_cols_per_row(i) + 1
1298      enddo
1299    enddo
1300    !write(0,*) 'exp_cols_per_row',exp_cols_per_row(:)
1301  end subroutine count_ext_rows_with_nz
1302
1303  subroutine count_exp_rows_per_rank(hecMESH, exp_cols_per_row, exp_rows_per_rank)
1304    implicit none
1305    type (hecmwST_local_mesh), intent(in) :: hecMESH
1306    integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1307    integer(kind=kint), allocatable, intent(out) :: exp_rows_per_rank(:)
1308    integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom
1309    allocate(exp_rows_per_rank(hecMESH%n_neighbor_pe))
1310    exp_rows_per_rank(1:hecMESH%n_neighbor_pe) = 0
1311    nn_int = hecMESH%nn_internal
1312    np = hecMESH%n_node
1313    nr_ext = np - nn_int
1314    do i = 1, nr_ext
1315      if (exp_cols_per_row(i) > 0) then
1316        irow = nn_int + i
1317        exp_rank = hecMESH%node_ID(2*irow)
1318        call rank_to_idom(hecMESH, exp_rank, idom)
1319        exp_rows_per_rank(idom) = exp_rows_per_rank(idom) + 1
1320      endif
1321    enddo
1322    !write(0,*) 'exp_rows_per_rank',exp_rows_per_rank(:)
1323  end subroutine count_exp_rows_per_rank
1324
1325  subroutine rank_to_idom(hecMESH, rank, idom)
1326    implicit none
1327    type (hecmwST_local_mesh), intent(in) :: hecMESH
1328    integer(kind=kint), intent(in) :: rank
1329    integer(kind=kint), intent(out) :: idom
1330    integer(kind=kint) :: i
1331    do i = 1, hecMESH%n_neighbor_pe
1332      if (hecMESH%neighbor_pe(i) == rank) then
1333        idom = i
1334        return
1335      endif
1336    enddo
1337    stop 'ERROR: exp_rank not found in neighbor_pe'
1338  end subroutine rank_to_idom
1339
1340  subroutine make_index(len, cnt, index)
1341    implicit none
1342    integer(kind=kint), intent(in) :: len
1343    integer(kind=kint), intent(in) :: cnt(len)
1344    integer(kind=kint), intent(out) :: index(0:)
1345    integer(kind=kint) :: i
1346    ! write(0,*) 'make_index: len',len
1347    index(0) = 0
1348    do i = 1,len
1349      index(i) = index(i-1) + cnt(i)
1350    enddo
1351  end subroutine make_index
1352
1353  subroutine make_exp_rows_item(hecMESH, exp_cols_per_row, exp_rows_index, exp_rows_item)
1354    implicit none
1355    type (hecmwST_local_mesh), intent(in) :: hecMESH
1356    integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1357    integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1358    integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1359    integer(kind=kint), allocatable :: cnt(:)
1360    integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom, idx
1361    allocate(exp_rows_item(2,exp_rows_index(hecMESH%n_neighbor_pe)))
1362    allocate(cnt(hecMESH%n_neighbor_pe))
1363    cnt(:) = 0
1364    nn_int = hecMESH%nn_internal
1365    np = hecMESH%n_node
1366    nr_ext = np - nn_int
1367    do i = 1, nr_ext
1368      if (exp_cols_per_row(i) > 0) then
1369        irow = nn_int + i
1370        exp_rank = hecMESH%node_ID(2*irow)
1371        call rank_to_idom(hecMESH, exp_rank, idom)
1372        cnt(idom) = cnt(idom) + 1
1373        idx = exp_rows_index(idom-1) + cnt(idom)
1374        exp_rows_item(1,idx) = irow
1375        exp_rows_item(2,idx) = exp_cols_per_row(i)
1376      endif
1377    enddo
1378    !write(0,*) 'cnt',cnt(:)
1379    do idom = 1, hecMESH%n_neighbor_pe
1380      if (cnt(idom) /= exp_rows_index(idom)-exp_rows_index(idom-1)) stop 'ERROR: make exp_rows_item'
1381    enddo
1382    !write(0,*) 'exp_rows_item(1,:)',exp_rows_item(1,:)
1383    !write(0,*) 'exp_rows_item(2,:)',exp_rows_item(2,:)
1384  end subroutine make_exp_rows_item
1385
1386  subroutine extract_BT_ext(hecMESH, BTmat, incl_nz, exp_rows_index, exp_rows_item, BT_ext)
1387    implicit none
1388    type (hecmwST_local_mesh), intent(in) :: hecMESH
1389    type (hecmwST_local_matrix), intent(in) :: BTmat
1390    integer(kind=kint), intent(in) :: incl_nz(:)
1391    integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1392    integer(kind=kint), intent(in) :: exp_rows_item(:,:)
1393    type (hecmwST_local_matrix), allocatable, intent(out) :: BT_ext(:)
1394    integer(kind=kint) :: ndof, ndof2, nn_int, nnz_int, idom, j, idx, ncol, cnt, jrow, ks, ke, k, kcol
1395    allocate(BT_ext(hecMESH%n_neighbor_pe))
1396    ndof = BTmat%ndof
1397    ndof2 = ndof * ndof
1398    nn_int = hecMESH%nn_internal
1399    nnz_int = BTmat%index(nn_int)
1400    do idom = 1, hecMESH%n_neighbor_pe
1401      BT_ext(idom)%nr = exp_rows_index(idom) - exp_rows_index(idom-1)
1402      BT_ext(idom)%nc = BTmat%nc
1403      BT_ext(idom)%nnz = 0
1404      BT_ext(idom)%ndof = ndof
1405      allocate(BT_ext(idom)%index(0:BT_ext(idom)%nr))
1406      BT_ext(idom)%index(0) = 0
1407      do j = 1, BT_ext(idom)%nr
1408        idx = exp_rows_index(idom-1) + j
1409        ncol = exp_rows_item(2,idx)
1410        BT_ext(idom)%index(j) = BT_ext(idom)%index(j-1) + ncol
1411      enddo
1412      BT_ext(idom)%nnz = BT_ext(idom)%index(BT_ext(idom)%nr)
1413      if (DEBUG >= 1) write(0,*) 'DEBUG: idom,nr,nc,nnz,ndof', &
1414           idom,BT_ext(idom)%nr,BT_ext(idom)%nc,BT_ext(idom)%nnz,BT_ext(idom)%ndof
1415      allocate(BT_ext(idom)%item(BT_ext(idom)%nnz))
1416      allocate(BT_ext(idom)%A(BT_ext(idom)%nnz * ndof2))
1417      cnt = 0
1418      do j = 1, BT_ext(idom)%nr
1419        idx = exp_rows_index(idom-1) + j
1420        jrow = exp_rows_item(1,idx)
1421        if (jrow < 1 .or. BTmat%nr < jrow) stop 'ERROR: extract BT_ext: jrow'
1422        ks = BTmat%index(jrow-1)+1
1423        ke = BTmat%index(jrow)
1424        do k = ks, ke
1425          kcol = BTmat%item(k)
1426          if (incl_nz(k-nnz_int) == 0) cycle
1427          cnt = cnt + 1
1428          BT_ext(idom)%item(cnt) = kcol
1429          BT_ext(idom)%A(ndof2*(cnt-1)+1:ndof2*cnt) = BTmat%A(ndof2*(k-1)+1:ndof2*k)
1430        enddo
1431        if (cnt /= BT_ext(idom)%index(j)) stop 'ERROR: extract BT_ext'
1432      enddo
1433      ! if (DEBUG >= 3) write(100+hecMESH%my_rank,*) 'BT_ext(',idom,')'
1434      ! call hecmw_localmat_write(BT_ext(idom), 100+hecMESH%my_rank)
1435    enddo
1436  end subroutine extract_BT_ext
1437
1438  subroutine prepare_column_info(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1439    implicit none
1440    type (hecmwST_local_mesh), intent(in) :: hecMESH
1441    type (hecmwST_local_matrix), intent(in) :: BT_ext(:)
1442    integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1443    integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1444    !
1445    call make_exp_cols_index(hecMESH%n_neighbor_pe, BT_ext, exp_cols_index)
1446    if (DEBUG >= 2) write(0,*) '  DEBUG2: make exp_cols_index done'
1447    if (DEBUG >= 3) write(0,*) '    DEBUG3: exp_cols_index', exp_cols_index(0:hecMESH%n_neighbor_pe)
1448    !
1449    ! (col ID, rank, global ID)
1450    !
1451    call make_exp_cols_item(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1452    if (DEBUG >= 2) write(0,*) '  DEBUG2: make exp_cols_item done'
1453    ! if (DEBUG >= 3) write(0,*) '    DEBUG3: exp_cols_item', exp_cols_item(1:cNCOL_ITEM,1:exp_cols_index(hecMESH%n_neighbor_pe))
1454  end subroutine prepare_column_info
1455
1456  subroutine make_exp_cols_index(nnb, BT_ext, exp_cols_index)
1457    implicit none
1458    integer(kind=kint), intent(in) :: nnb
1459    type (hecmwST_local_matrix), intent(in) :: BT_ext(:)
1460    integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1461    integer(kind=kint) :: idom
1462    allocate(exp_cols_index(0:nnb))
1463    exp_cols_index(0) = 0
1464    do idom = 1, nnb
1465      exp_cols_index(idom) = exp_cols_index(idom-1) + BT_ext(idom)%nnz
1466    enddo
1467  end subroutine make_exp_cols_index
1468
1469  subroutine make_exp_cols_item(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1470    implicit none
1471    type (hecmwST_local_mesh), intent(in) :: hecMESH
1472    type (hecmwST_local_matrix), intent(in) :: BT_ext(:)
1473    integer(kind=kint), allocatable, intent(in) :: exp_cols_index(:)
1474    integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1475    integer(kind=kint) :: cnt, idom, j, jcol
1476    allocate(exp_cols_item(cNCOL_ITEM,exp_cols_index(hecMESH%n_neighbor_pe)))
1477    cnt = 0
1478    do idom = 1, hecMESH%n_neighbor_pe
1479      do j = 1, BT_ext(idom)%nnz
1480        cnt = cnt + 1
1481        jcol = BT_ext(idom)%item(j)
1482        ! if (DEBUG >= 3) write(0,*) '    DEBUG3: idom,j,cnt,jcol,nn_internal,n_node',&
1483        !      idom,j,cnt,jcol,hecMESH%nn_internal,hecMESH%n_node
1484        ! if (DEBUG >= 3) write(0,*) '    DEBUG3: size of exp_cols_item',size(exp_cols_item)
1485        ! if (DEBUG >= 3) write(0,*) '    DEBUG3: size of node_ID',size(hecMESH%node_ID)
1486        ! if (DEBUG >= 3) write(0,*) '    DEBUG3: size of global_node_ID',size(hecMESH%global_node_ID)
1487        exp_cols_item(cLID,cnt) = hecMESH%node_ID(2*jcol-1)
1488        exp_cols_item(cRANK,cnt) = hecMESH%node_ID(2*jcol)
1489        if (cNCOL_ITEM >= 3) exp_cols_item(cGID,cnt) = hecMESH%global_node_ID(jcol)
1490        ! if (DEBUG >= 3) write(0,*) '    DEBUG3: lid,rank(,gid)',exp_cols_item(1:cNCOL_ITEM,cnt)
1491      enddo
1492      if (cnt /= exp_cols_index(idom)) stop 'ERROR: make exp_cols_item'
1493    enddo
1494  end subroutine make_exp_cols_item
1495
1496  subroutine send_BT_ext_and_recv_BT_int(hecMESH, exp_rows_index, exp_rows_item, BT_ext, &
1497       exp_cols_index, exp_cols_item, BT_int, hecMESHnew)
1498    implicit none
1499    type (hecmwST_local_mesh), intent(in) :: hecMESH
1500    integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1501    integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1502    type (hecmwST_local_matrix), allocatable, intent(inout) :: BT_ext(:)
1503    type (hecmwST_local_matrix), intent(out) :: BT_int
1504    type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1505    integer(kind=kint), allocatable :: imp_rows_index(:), imp_cols_index(:)
1506    integer(kind=kint), allocatable :: imp_rows_item(:,:), imp_cols_item(:,:)
1507    real(kind=kreal), allocatable :: imp_vals_item(:)
1508    integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
1509    integer(kind=kint) :: ndof, ndof2, idom, n_add_node, i0
1510    if (hecMESH%n_neighbor_pe == 0) return
1511    ndof = BT_ext(1)%ndof
1512    ndof2 = ndof*ndof
1513    !
1514    call convert_rowID_to_remote_localID(hecMESH, exp_rows_index(hecMESH%n_neighbor_pe), exp_rows_item)
1515    if (DEBUG >= 2) write(0,*) '  DEBUG2: convert rowID to remote localID done'
1516    !
1517    call send_recv_BT_ext_nr_nnz(hecMESH, BT_ext, imp_rows_index, imp_cols_index)
1518    if (DEBUG >= 2) write(0,*) '  DEBUG2: send recv BT_ext nr and nnz done'
1519    !
1520    call send_recv_BT_ext_contents(hecMESH, BT_ext, &
1521         exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1522         imp_rows_index, imp_cols_index, &
1523         imp_rows_item, imp_cols_item, imp_vals_item)
1524    if (DEBUG >= 2) write(0,*) '  DEBUG2: send recv BT_ext contents done'
1525    !
1526    do idom = 1, hecMESH%n_neighbor_pe
1527      call hecmw_localmat_free(BT_ext(idom))
1528    enddo
1529    deallocate(BT_ext)
1530    !
1531    call allocate_BT_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int)
1532    if (DEBUG >= 2) write(0,*) '  DEBUG2: allocate BT_int done'
1533    !
1534    ! call copy_mesh(hecMESH, hecMESHnew)
1535    ! if (DEBUG >= 2) write(0,*) '  DEBUG2: copy mesh done'
1536    !
1537    call map_imported_cols(hecMESHnew, imp_cols_index(hecMESH%n_neighbor_pe), &
1538         imp_cols_item, n_add_node, add_nodes, map, i0)
1539    if (DEBUG >= 2) write(0,*) '  DEBUG2: map imported cols done'
1540    !
1541    call update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
1542    if (DEBUG >= 2) write(0,*) '  DEBUG2: update comm_table done'
1543    !
1544    BT_int%nc = hecMESHnew%n_node
1545    !
1546    call copy_vals_to_BT_int(hecMESH%n_neighbor_pe, imp_rows_index, imp_cols_index, &
1547         imp_rows_item, map, ndof2, imp_vals_item, BT_int)
1548    if (DEBUG >= 2) write(0,*) '  DEBUG2: copy vals to BT_int done'
1549    !
1550    deallocate(imp_rows_index)
1551    deallocate(imp_cols_index)
1552    deallocate(imp_rows_item)
1553    deallocate(imp_cols_item)
1554    deallocate(imp_vals_item)
1555    deallocate(map)
1556    !
1557    call sort_and_uniq_rows(BT_int)
1558    if (DEBUG >= 2) write(0,*) '  DEBUG2: sort and uniq rows of BT_int done'
1559  end subroutine send_BT_ext_and_recv_BT_int
1560
1561  subroutine convert_rowID_to_remote_localID(hecMESH, len, exp_rows_item)
1562    implicit none
1563    type (hecmwST_local_mesh), intent(in) :: hecMESH
1564    integer(kind=kint), intent(in) :: len
1565    integer(kind=kint), intent(out) :: exp_rows_item(:,:)
1566    integer(kind=kint) :: i
1567    do i = 1, len
1568      exp_rows_item(1,i) = hecMESH%node_ID(2 * exp_rows_item(1,i) - 1)
1569    enddo
1570  end subroutine convert_rowID_to_remote_localID
1571
1572  subroutine send_recv_BT_ext_nr_nnz(hecMESH, BT_ext, imp_rows_index, imp_cols_index)
1573    use m_hecmw_comm_f
1574    implicit none
1575    type (hecmwST_local_mesh), intent(in) :: hecMESH
1576    type (hecmwST_local_matrix), intent(in) :: BT_ext(:)
1577    integer(kind=kint), allocatable, intent(out) :: imp_rows_index(:), imp_cols_index(:)
1578    integer(kind=kint) :: nnb, idom, irank, tag, recvbuf(2)
1579    integer(kind=kint), allocatable :: sendbuf(:,:)
1580    integer(kind=kint), allocatable :: requests(:)
1581    integer(kind=kint), allocatable :: statuses(:,:)
1582    nnb = hecMESH%n_neighbor_pe
1583    allocate(imp_rows_index(0:nnb))
1584    allocate(imp_cols_index(0:nnb))
1585    allocate(requests(nnb))
1586    allocate(statuses(HECMW_STATUS_SIZE, nnb))
1587    allocate(sendbuf(2,nnb))
1588    do idom = 1, nnb
1589      irank = hecMESH%neighbor_pe(idom)
1590      ! nr = exp_rows_per_rank(idom)
1591      sendbuf(1,idom) = BT_ext(idom)%nr
1592      sendbuf(2,idom) = BT_ext(idom)%nnz
1593      tag=2001
1594      call HECMW_ISEND_INT(sendbuf(1,idom), 2, irank, tag, hecMESH%MPI_COMM, &
1595           requests(idom))
1596    enddo
1597    imp_rows_index(0) = 0
1598    imp_cols_index(0) = 0
1599    do idom = 1, nnb
1600      irank = hecMESH%neighbor_pe(idom)
1601      tag = 2001
1602      call HECMW_RECV_INT(recvbuf, 2, irank, tag, &
1603           hecMESH%MPI_COMM, statuses(:,1))
1604      imp_rows_index(idom) = imp_rows_index(idom-1) + recvbuf(1)
1605      imp_cols_index(idom) = imp_cols_index(idom-1) + recvbuf(2)
1606    enddo
1607    call HECMW_Waitall(nnb, requests, statuses)
1608    deallocate(requests)
1609    deallocate(statuses)
1610    deallocate(sendbuf)
1611  end subroutine send_recv_BT_ext_nr_nnz
1612
1613  subroutine send_recv_BT_ext_contents(hecMESH, BT_ext, &
1614       exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1615       imp_rows_index, imp_cols_index, &
1616       imp_rows_item, imp_cols_item, imp_vals_item)
1617    use m_hecmw_comm_f
1618    implicit none
1619    type (hecmwST_local_mesh), intent(in) :: hecMESH
1620    type (hecmwST_local_matrix), intent(in) :: BT_ext(:)
1621    integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1622    integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1623    integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
1624    integer(kind=kint), allocatable, intent(out) :: imp_rows_item(:,:), imp_cols_item(:,:)
1625    real(kind=kreal), allocatable, intent(out) :: imp_vals_item(:)
1626    integer(kind=kint) :: nnb, ndof2, n_send, idom, irank, tag, nr, nnz
1627    integer(kind=kint), allocatable :: requests(:)
1628    integer(kind=kint), allocatable :: statuses(:,:)
1629    nnb = hecMESH%n_neighbor_pe
1630    if (nnb < 1) return
1631    ndof2 = BT_ext(1)%ndof ** 2
1632    allocate(imp_rows_item(2,imp_rows_index(nnb)))
1633    allocate(imp_cols_item(cNCOL_ITEM,imp_cols_index(nnb)))
1634    allocate(imp_vals_item(ndof2*imp_cols_index(nnb)))
1635    allocate(requests(3*nnb))
1636    allocate(statuses(HECMW_STATUS_SIZE, 3*nnb))
1637    n_send = 0
1638    do idom = 1, nnb
1639      irank = hecMESH%neighbor_pe(idom)
1640      if (BT_ext(idom)%nr > 0) then
1641        n_send = n_send + 1
1642        tag = 2002
1643        call HECMW_ISEND_INT(exp_rows_item(1,exp_rows_index(idom-1)+1), &
1644             2*BT_ext(idom)%nr, irank, tag, hecMESH%MPI_COMM, &
1645             requests(n_send))
1646        n_send = n_send + 1
1647        tag = 2003
1648        call HECMW_ISEND_INT(exp_cols_item(1,exp_cols_index(idom-1)+1), &
1649             cNCOL_ITEM*BT_ext(idom)%nnz, irank, tag, hecMESH%MPI_COMM, &
1650             requests(n_send))
1651        n_send = n_send + 1
1652        tag = 2004
1653        call HECMW_ISEND_R(BT_ext(idom)%A, ndof2*BT_ext(idom)%nnz, irank, &
1654             tag, hecMESH%MPI_COMM, requests(n_send))
1655      endif
1656    enddo
1657    do idom = 1, nnb
1658      irank = hecMESH%neighbor_pe(idom)
1659      nr = imp_rows_index(idom) - imp_rows_index(idom-1)
1660      nnz = imp_cols_index(idom) - imp_cols_index(idom-1)
1661      if (nr > 0) then
1662        tag = 2002
1663        call HECMW_RECV_INT(imp_rows_item(1,imp_rows_index(idom-1)+1), &
1664             2*nr, irank, tag, hecMESH%MPI_COMM, statuses(:,1))
1665        tag = 2003
1666        call HECMW_RECV_INT(imp_cols_item(1,imp_cols_index(idom-1)+1), &
1667             cNCOL_ITEM*nnz, irank, tag, hecMESH%MPI_COMM, statuses(:,1))
1668        tag = 2004
1669        call HECMW_RECV_R(imp_vals_item(ndof2*imp_cols_index(idom-1)+1), &
1670             ndof2*nnz, irank, tag, hecMESH%MPI_COMM, statuses(:,1))
1671      endif
1672    enddo
1673    call HECMW_Waitall(n_send, requests, statuses)
1674    deallocate(exp_rows_index)
1675    deallocate(exp_rows_item)
1676    deallocate(exp_cols_index)
1677    deallocate(exp_cols_item)
1678  end subroutine send_recv_BT_ext_contents
1679
1680  subroutine allocate_BT_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int)
1681    implicit none
1682    type (hecmwST_local_mesh), intent(in) :: hecMESH
1683    integer(kind=kint), intent(in) :: ndof
1684    integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_rows_item(:,:)
1685    type (hecmwST_local_matrix), intent(out) :: BT_int
1686    integer(kind=kint), allocatable :: cnt(:)
1687    integer(kind=kint) :: idom, is, ie, i, irow, ncol, ndof2
1688    ndof2 = ndof*ndof
1689    BT_int%nr = hecMESH%nn_internal
1690    BT_int%nc = hecMESH%n_node
1691    BT_int%nnz = 0
1692    BT_int%ndof = ndof
1693    allocate(cnt(BT_int%nr))
1694    cnt(:) = 0
1695    do idom = 1, hecMESH%n_neighbor_pe
1696      is = imp_rows_index(idom-1)+1
1697      ie = imp_rows_index(idom)
1698      do i = is, ie
1699        irow = imp_rows_item(1,i)
1700        ncol = imp_rows_item(2,i)
1701        if (irow < 1 .or. BT_int%nr < irow) stop 'ERROR: allocate BT_int'
1702        cnt(irow) = cnt(irow) + ncol   !!! might include duplicate cols
1703      enddo
1704    enddo
1705    !
1706    allocate(BT_int%index(0:BT_int%nr))
1707    call make_index(BT_int%nr, cnt, BT_int%index)
1708    !
1709    BT_int%nnz = BT_int%index(BT_int%nr)
1710    allocate(BT_int%item(BT_int%nnz))
1711    allocate(BT_int%A(BT_int%nnz * ndof2))
1712    BT_int%A(:) = 0.d0
1713  end subroutine allocate_BT_int
1714
1715  subroutine copy_mesh(src, dst)
1716    implicit none
1717    type (hecmwST_local_mesh), intent(in) :: src
1718    type (hecmwST_local_mesh), intent(out) :: dst
1719    dst%zero = src%zero
1720    dst%MPI_COMM = src%MPI_COMM
1721    dst%PETOT = src%PETOT
1722    dst%PEsmpTOT = src%PEsmpTOT
1723    dst%my_rank = src%my_rank
1724    dst%n_subdomain = src%n_subdomain
1725    dst%n_node = src%n_node
1726    dst%nn_internal = src%nn_internal
1727    dst%n_dof = src%n_dof
1728    dst%n_neighbor_pe = src%n_neighbor_pe
1729    allocate(dst%neighbor_pe(dst%n_neighbor_pe))
1730    dst%neighbor_pe(:) = src%neighbor_pe(:)
1731    allocate(dst%import_index(0:dst%n_neighbor_pe))
1732    allocate(dst%export_index(0:dst%n_neighbor_pe))
1733    dst%import_index(:)= src%import_index(:)
1734    dst%export_index(:)= src%export_index(:)
1735    allocate(dst%import_item(dst%import_index(dst%n_neighbor_pe)))
1736    dst%import_item(:) = src%import_item(:)
1737    allocate(dst%export_item(dst%export_index(dst%n_neighbor_pe)))
1738    dst%export_item(:) = src%export_item(:)
1739    allocate(dst%node_ID(2*dst%n_node))
1740    dst%node_ID(1:2*dst%n_node) = src%node_ID(1:2*src%n_node)
1741    allocate(dst%global_node_ID(dst%n_node))
1742    dst%global_node_ID(1:dst%n_node) = src%global_node_ID(1:src%n_node)
1743    dst%mpc%n_mpc = 0
1744    dst%node => src%node
1745  end subroutine copy_mesh
1746
1747  subroutine map_imported_cols(hecMESHnew, ncols, cols, n_add_node, add_nodes, map, i0)
1748    implicit none
1749    type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1750    integer(kind=kint), intent(in) :: ncols
1751    integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1752    integer(kind=kint), allocatable, intent(out) :: map(:)
1753    integer(kind=kint), intent(out) :: n_add_node
1754    integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1755    integer(kind=kint), intent(out) :: i0
1756    allocate(map(ncols))
1757    !
1758    call map_present_nodes(hecMESHnew, ncols, cols, map, n_add_node)
1759    !
1760    ! add nodes == unmapped nodes
1761    !
1762    call extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1763    !
1764    call append_nodes(hecMESHnew, n_add_node, add_nodes, i0)
1765    !
1766    call map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1767  end subroutine map_imported_cols
1768
1769  subroutine map_present_nodes(hecMESH, ncols, cols, map, n_add_node)
1770    implicit none
1771    type (hecmwST_local_mesh), intent(in) :: hecMESH
1772    integer(kind=kint), intent(in) :: ncols
1773    integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1774    integer(kind=kint), intent(out) :: map(ncols)
1775    integer(kind=kint), intent(out) :: n_add_node
1776    integer(kind=kint) :: i, j, lid, rank, llid, n_ext_node, idx
1777    integer(kind=kint), allocatable :: ext_node(:)
1778    type (hecmwST_pair_array) :: parray
1779    !
1780    call hecmw_pair_array_init(parray, hecMESH%n_node - hecMESH%nn_internal)
1781    do i = hecMESH%nn_internal + 1, hecMESH%n_node
1782      call hecmw_pair_array_append(parray, i, hecMESH%node_ID(2*i-1), hecMESH%node_ID(2*i))
1783    enddo
1784    call hecmw_pair_array_sort(parray)
1785    !
1786    n_add_node = 0
1787    n_ext_node = 0
1788    allocate(ext_node(ncols))
1789    !$omp parallel default(none), &
1790      !$omp& private(i,lid,rank,llid,idx,j), &
1791      !$omp& shared(ncols,hecMESH,cols,map,n_ext_node,ext_node,parray), &
1792      !$omp& reduction(+:n_add_node)
1793    !$omp do
1794    do i = 1, ncols
1795      lid = cols(cLID,i)
1796      rank = cols(cRANK,i)
1797      !   check rank
1798      if (rank == hecMESH%my_rank) then  !   internal: set mapping
1799        map(i) = lid
1800      else                               !   external
1801        !$omp atomic capture
1802        n_ext_node = n_ext_node + 1
1803        idx = n_ext_node
1804        !$omp end atomic
1805        ext_node(idx) = i
1806      endif
1807    enddo
1808    !$omp end do
1809    !$omp do
1810    do j = 1, n_ext_node
1811      i = ext_node(j)
1812      lid = cols(cLID,i)
1813      rank = cols(cRANK,i)
1814      !     search node_ID in external nodes
1815      llid = hecmw_pair_array_find_id(parray, lid, rank)
1816      if (llid > 0) then  !     found: set mapping
1817        map(i) = llid
1818      else                !     not found
1819        map(i) = -1
1820        n_add_node = n_add_node + 1
1821      endif
1822    enddo
1823    !$omp end do
1824    !$omp end parallel
1825    deallocate(ext_node)
1826    !
1827    call hecmw_pair_array_finalize(parray)
1828  end subroutine map_present_nodes
1829
1830  subroutine extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1831    implicit none
1832    integer(kind=kint), intent(in) :: ncols
1833    integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols), map(ncols)
1834    integer(kind=kint), intent(inout) :: n_add_node
1835    integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1836    integer(kind=kint) :: cnt, i
1837    allocate(add_nodes(cNCOL_ITEM,n_add_node))
1838    cnt = 0
1839    do i = 1, ncols
1840      if (map(i) == -1) then
1841        cnt = cnt + 1
1842        add_nodes(1:cNCOL_ITEM,cnt) = cols(1:cNCOL_ITEM,i)
1843      endif
1844    enddo
1845    if (cnt /= n_add_node) stop 'ERROR: extract add_nodes'
1846    call sort_and_uniq_add_nodes(n_add_node, add_nodes)
1847  end subroutine extract_add_nodes
1848
1849  subroutine sort_and_uniq_add_nodes(n_add_node, add_nodes)
1850    implicit none
1851    integer(kind=kint), intent(inout) :: n_add_node
1852    integer(kind=kint), intent(inout) :: add_nodes(cNCOL_ITEM,n_add_node)
1853    integer(kind=kint) :: ndup
1854    call sort_add_nodes(add_nodes, 1, n_add_node)
1855    call uniq_add_nodes(add_nodes, n_add_node, ndup)
1856    n_add_node = n_add_node - ndup
1857  end subroutine sort_and_uniq_add_nodes
1858
1859  recursive subroutine sort_add_nodes(add_nodes, id1, id2)
1860    implicit none
1861    integer(kind=kint), intent(inout) :: add_nodes(:,:)
1862    integer(kind=kint), intent(in) :: id1, id2
1863    integer(kind=kint) :: center, left, right
1864    integer(kind=kint) :: pivot(cNCOL_ITEM), tmp(cNCOL_ITEM)
1865    if (id1 >= id2) return
1866    center = (id1 + id2) / 2
1867    pivot(1:cNCOL_ITEM) = add_nodes(1:cNCOL_ITEM,center)
1868    left = id1
1869    right = id2
1870    do
1871      do while ((add_nodes(cRANK,left) < pivot(cRANK)) .or. &
1872           (add_nodes(cRANK,left) == pivot(cRANK) .and. add_nodes(cLID,left) < pivot(cLID)))
1873        left = left + 1
1874      enddo
1875      do while ((pivot(cRANK) < add_nodes(cRANK,right)) .or. &
1876           (pivot(cRANK) == add_nodes(cRANK,right) .and. pivot(cLID) < add_nodes(cLID,right)))
1877        right = right - 1
1878      enddo
1879      if (left >= right) exit
1880      tmp(1:cNCOL_ITEM) = add_nodes(1:cNCOL_ITEM,left)
1881      add_nodes(1:cNCOL_ITEM,left) = add_nodes(1:cNCOL_ITEM,right)
1882      add_nodes(1:cNCOL_ITEM,right) = tmp(1:cNCOL_ITEM)
1883      left = left + 1
1884      right = right - 1
1885    enddo
1886    if (id1 < left-1) call sort_add_nodes(add_nodes, id1, left-1)
1887    if (right+1 < id2) call sort_add_nodes(add_nodes, right+1, id2)
1888    return
1889  end subroutine sort_add_nodes
1890
1891  subroutine uniq_add_nodes(add_nodes, len, ndup)
1892    implicit none
1893    integer(kind=kint), intent(inout) :: add_nodes(:,:)
1894    integer(kind=kint), intent(in) :: len
1895    integer(kind=kint), intent(out) :: ndup
1896    integer(kind=kint) :: i
1897    ndup = 0
1898    do i = 2,len
1899      if (add_nodes(cLID,i) == add_nodes(cLID,i-1-ndup) .and. &
1900           add_nodes(cRANK,i) == add_nodes(cRANK,i-1-ndup)) then
1901        ndup = ndup + 1
1902      else if (ndup > 0) then
1903        add_nodes(1:cNCOL_ITEM,i-ndup) = add_nodes(1:cNCOL_ITEM,i)
1904      endif
1905    enddo
1906  end subroutine uniq_add_nodes
1907
1908  subroutine search_add_nodes(n_add_node, add_nodes, rank, lid, idx)
1909    implicit none
1910    integer(kind=kint), intent(in) :: n_add_node
1911    integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
1912    integer(kind=kint), intent(in) :: rank
1913    integer(kind=kint), intent(in) :: lid
1914    integer(kind=kint), intent(out) :: idx
1915    integer(kind=kint) :: left, right, center
1916    left = 1
1917    right = n_add_node
1918    do while (left <= right)
1919      center = (left + right) / 2
1920      if ((rank == add_nodes(cRANK,center)) .and. (lid == add_nodes(cLID,center))) then
1921        idx = center
1922        return
1923      else if ((rank < add_nodes(cRANK,center)) .or. &
1924           (rank == add_nodes(cRANK,center) .and. lid < add_nodes(cLID,center))) then
1925        right = center - 1
1926      else if ((add_nodes(cRANK,center) < rank) .or. &
1927           (add_nodes(cRANK,center) == rank .and. add_nodes(cLID,center) < lid)) then
1928        left = center + 1
1929      endif
1930    end do
1931    idx = -1
1932  end subroutine search_add_nodes
1933
1934  subroutine append_nodes(hecMESHnew, n_add_node, add_nodes, i0)
1935    implicit none
1936    type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1937    integer(kind=kint), intent(in) :: n_add_node
1938    integer(kind=kint), intent(in) :: add_nodes(:,:)
1939    integer(kind=kint), intent(out) :: i0
1940    integer(kind=kint) :: n_node, i, ii
1941    integer(kind=kint), pointer :: node_ID(:), global_node_ID(:)
1942    i0 = hecMESHnew%n_node
1943    n_node = hecMESHnew%n_node + n_add_node
1944    allocate(node_ID(2*n_node))
1945    allocate(global_node_ID(n_node))
1946    do i = 1, hecMESHnew%n_node
1947      node_ID(2*i-1) = hecMESHnew%node_ID(2*i-1)
1948      node_ID(2*i  ) = hecMESHnew%node_ID(2*i  )
1949      global_node_ID(i) = hecMESHnew%global_node_ID(i)
1950    enddo
1951    do i = 1, n_add_node
1952      ii = hecMESHnew%n_node + i
1953      node_ID(2*ii-1) = add_nodes(cLID,i)
1954      node_ID(2*ii  ) = add_nodes(cRANK,i)
1955      if (cNCOL_ITEM >= 3) then
1956        global_node_ID(i) = add_nodes(cGID,i)
1957      else
1958        global_node_ID(i) = -1
1959      endif
1960    enddo
1961    deallocate(hecMESHnew%node_ID)
1962    deallocate(hecMESHnew%global_node_ID)
1963    hecMESHnew%n_node = n_node
1964    hecMESHnew%node_ID => node_ID
1965    hecMESHnew%global_node_ID => global_node_ID
1966  end subroutine append_nodes
1967
1968  subroutine map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1969    implicit none
1970    integer(kind=kint), intent(in) :: ncols
1971    integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1972    integer(kind=kint), intent(in) :: n_add_node
1973    integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
1974    integer(kind=kint), intent(in) :: i0
1975    integer(kind=kint), intent(inout) :: map(ncols)
1976    integer(kind=kint) :: i, j
1977    do i = 1, ncols
1978      if (map(i) > 0) cycle
1979      call search_add_nodes(n_add_node, add_nodes, cols(cRANK,i), cols(cLID,i), j)
1980      if (j == -1) stop 'ERROR: map_additional_nodes'
1981      map(i) = i0 + j
1982    enddo
1983  end subroutine map_additional_nodes
1984
1985  subroutine update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
1986    use m_hecmw_comm_f
1987    implicit none
1988    type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1989    integer(kind=kint), intent(in) :: n_add_node
1990    integer(kind=kint), allocatable, intent(inout) :: add_nodes(:,:)
1991    integer(kind=kint), intent(in) :: i0
1992    integer(kind=kint), allocatable :: n_add_imp(:), add_imp_index(:)
1993    integer(kind=kint), allocatable :: add_imp_item_remote(:), add_imp_item_local(:)
1994    integer(kind=kint), allocatable :: n_add_exp(:), add_exp_index(:), add_exp_item(:)
1995    integer(kind=kint), allocatable :: n_new_imp(:), n_new_exp(:)
1996    integer(kind=kint) :: npe, nnb, comm, new_nnb
1997    integer(kind=kint), pointer :: nbpe(:), new_nbpe(:)
1998    integer(kind=kint), pointer :: import_index(:), export_index(:), import_item(:), export_item(:)
1999    integer(kind=kint), pointer :: new_import_index(:), new_export_index(:)
2000    integer(kind=kint), pointer :: new_import_item(:), new_export_item(:)
2001    npe = hecMESHnew%PETOT
2002    nnb = hecMESHnew%n_neighbor_pe
2003    comm = hecMESHnew%MPI_COMM
2004    nbpe => hecMESHnew%neighbor_pe
2005    import_index => hecMESHnew%import_index
2006    export_index => hecMESHnew%export_index
2007    import_item => hecMESHnew%import_item
2008    export_item => hecMESHnew%export_item
2009    !
2010    call count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2011    if (DEBUG >= 3) write(0,*) '    DEBUG3: count add_imp per rank done'
2012    !
2013    allocate(add_imp_index(0:npe))
2014    call make_index(npe, n_add_imp, add_imp_index)
2015    if (DEBUG >= 3) write(0,*) '    DEBUG3: make add_imp_index done'
2016    !
2017    call make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2018         add_imp_item_remote, add_imp_item_local)
2019    if (DEBUG >= 3) write(0,*) '    DEBUG3: make add_imp_item done'
2020    !
2021    deallocate(add_nodes)
2022    !
2023    ! all_to_all n_add_imp -> n_add_exp
2024    !
2025    allocate(n_add_exp(npe))
2026    call HECMW_ALLTOALL_INT(n_add_imp, 1, n_add_exp, 1, comm)
2027    if (DEBUG >= 3) write(0,*) '    DEBUG3: alltoall n_add_imp to n_add_exp done'
2028    !
2029    allocate(add_exp_index(0:npe))
2030    call make_index(npe, n_add_exp, add_exp_index)
2031    if (DEBUG >= 3) write(0,*) '    DEBUG3: make add_exp_index done'
2032    !
2033    call send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2034         add_exp_index, add_exp_item, comm)
2035    if (DEBUG >= 3) write(0,*) '    DEBUG3: send recv add_imp/exp_item done'
2036    !
2037    ! count new import
2038    !
2039    call count_new_comm_nodes(npe, nnb, nbpe, import_index, n_add_imp, n_new_imp)
2040    if (DEBUG >= 3) write(0,*) '    DEBUG3: count new comm_nodes (import) done'
2041    !
2042    ! count new export
2043    !
2044    call count_new_comm_nodes(npe, nnb, nbpe, export_index, n_add_exp, n_new_exp)
2045    if (DEBUG >= 3) write(0,*) '    DEBUG3: count new comm_nodes (export) done'
2046    !
2047    call update_neighbor_pe(npe, n_new_imp, n_new_exp, new_nnb, new_nbpe)
2048    if (DEBUG >= 3) write(0,*) '    DEBUG3: update neighbor_pe done'
2049    !
2050    ! merge import table: import
2051    !
2052    call merge_comm_table(npe, nnb, nbpe, import_index, import_item, &
2053         new_nnb, new_nbpe, add_imp_index, add_imp_item_local, n_add_imp, n_new_imp, &
2054         new_import_index, new_import_item)
2055    if (DEBUG >= 3) write(0,*) '    DEBUG3: merge comm_table (import) done'
2056    !
2057    deallocate(n_add_imp)
2058    deallocate(add_imp_index)
2059    deallocate(add_imp_item_remote, add_imp_item_local)
2060    deallocate(n_new_imp)
2061    !
2062    ! merge export table: export
2063    !
2064    call merge_comm_table(npe, nnb, nbpe, export_index, export_item, &
2065         new_nnb, new_nbpe, add_exp_index, add_exp_item, n_add_exp, n_new_exp, &
2066         new_export_index, new_export_item)
2067    if (DEBUG >= 3) write(0,*) '    DEBUG3: merge comm_table (export) done'
2068    !
2069    deallocate(n_add_exp)
2070    deallocate(add_exp_index)
2071    deallocate(add_exp_item)
2072    deallocate(n_new_exp)
2073    !
2074    deallocate(nbpe)
2075    deallocate(import_index,import_item)
2076    deallocate(export_index,export_item)
2077    hecMESHnew%n_neighbor_pe = new_nnb
2078    hecMESHnew%neighbor_pe => new_nbpe
2079    hecMESHnew%import_index => new_import_index
2080    hecMESHnew%export_index => new_export_index
2081    hecMESHnew%import_item => new_import_item
2082    hecMESHnew%export_item => new_export_item
2083  end subroutine update_comm_table
2084
2085  subroutine count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2086    implicit none
2087    integer(kind=kint), intent(in) :: n_add_node
2088    integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
2089    integer(kind=kint), intent(in) :: npe
2090    integer(kind=kint), allocatable, intent(out) :: n_add_imp(:)
2091    integer(kind=kint) :: i, rank
2092    allocate(n_add_imp(npe))
2093    n_add_imp(:) = 0
2094    do i = 1, n_add_node
2095      rank = add_nodes(cRANK,i)
2096      n_add_imp(rank+1) = n_add_imp(rank+1) + 1
2097    enddo
2098  end subroutine count_add_imp_per_rank
2099
2100  subroutine make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2101       add_imp_item_remote, add_imp_item_local)
2102    implicit none
2103    integer(kind=kint), intent(in) :: n_add_node
2104    integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
2105    integer(kind=kint), intent(in) :: npe, i0
2106    integer(kind=kint), allocatable, intent(in) :: add_imp_index(:)
2107    integer(kind=kint), allocatable, intent(out) :: add_imp_item_remote(:), add_imp_item_local(:)
2108    integer(kind=kint), allocatable :: cnt(:)
2109    integer(kind=kint) :: i, lid, rank, ipe
2110    allocate(add_imp_item_remote(add_imp_index(npe)))
2111    allocate(add_imp_item_local(add_imp_index(npe)))
2112    allocate(cnt(npe))
2113    cnt(:) = 0
2114    do i = 1, n_add_node
2115      lid = add_nodes(cLID,i)
2116      rank = add_nodes(cRANK,i)
2117      ipe = rank + 1
2118      cnt(ipe) = cnt(ipe) + 1
2119      add_imp_item_remote(add_imp_index(ipe-1) + cnt(ipe)) = lid
2120      add_imp_item_local(add_imp_index(ipe-1) + cnt(ipe)) = i0 + i
2121    enddo
2122    deallocate(cnt)
2123  end subroutine make_add_imp_item
2124
2125  subroutine send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2126       add_exp_index, add_exp_item, mpi_comm)
2127    use m_hecmw_comm_f
2128    implicit none
2129    integer(kind=kint), intent(in) :: npe
2130    integer(kind=kint), allocatable, intent(in) :: add_imp_index(:), add_imp_item_remote(:)
2131    integer(kind=kint), allocatable, intent(in) :: add_exp_index(:)
2132    integer(kind=kint), allocatable, intent(out) :: add_exp_item(:)
2133    integer(kind=kint), intent(in) :: mpi_comm
2134    integer(kind=kint) :: n_send, i, irank, is, ie, len, tag
2135    integer(kind=kint), allocatable :: requests(:)
2136    integer(kind=kint), allocatable :: statuses(:,:)
2137    allocate(add_exp_item(add_exp_index(npe)))
2138    allocate(requests(npe))
2139    allocate(statuses(HECMW_STATUS_SIZE, npe))
2140    n_send = 0
2141    do i = 1, npe
2142      irank = i-1
2143      is = add_imp_index(i-1)+1
2144      ie = add_imp_index(i)
2145      len = ie - is + 1
2146      if (len == 0) cycle
2147      tag = 4001
2148      n_send = n_send + 1
2149      call HECMW_ISEND_INT(add_imp_item_remote(is:ie), len, irank, tag, &
2150           mpi_comm, requests(n_send))
2151    enddo
2152    !
2153    do i = 1, npe
2154      irank = i-1
2155      is = add_exp_index(i-1)+1
2156      ie = add_exp_index(i)
2157      len = ie - is + 1
2158      if (len == 0) cycle
2159      tag = 4001
2160      call HECMW_RECV_INT(add_exp_item(is:ie), len, irank, tag, &
2161           mpi_comm, statuses(:,1))
2162    enddo
2163    call HECMW_Waitall(n_send, requests, statuses)
2164  end subroutine send_recv_add_imp_exp_item
2165
2166  subroutine count_new_comm_nodes(npe, org_nnb, org_nbpe, org_index, n_add, n_new)
2167    implicit none
2168    integer(kind=kint), intent(in) :: npe, org_nnb
2169    !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), n_add(npe)
2170    integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:)
2171    integer(kind=kint), intent(in) ::  n_add(:)
2172    integer(kind=kint), allocatable, intent(out) :: n_new(:)
2173    integer(kind=kint) :: i, irank, n_org
2174    allocate(n_new(npe))
2175    n_new(:) = n_add(:)
2176    do i = 1, org_nnb
2177      irank = org_nbpe(i)
2178      n_org = org_index(i) - org_index(i-1)
2179      n_new(irank+1) = n_new(irank+1) + n_org
2180    enddo
2181  end subroutine count_new_comm_nodes
2182
2183  subroutine update_neighbor_pe(npe, n_new_imp, n_new_exp, &
2184       new_nnb, new_nbpe)
2185    implicit none
2186    integer(kind=kint), intent(in) :: npe
2187    integer(kind=kint), intent(in) :: n_new_imp(npe), n_new_exp(npe)
2188    integer(kind=kint), intent(out) :: new_nnb
2189    integer(kind=kint), pointer, intent(out) :: new_nbpe(:)
2190    integer(kind=kint) :: i
2191    new_nnb = 0
2192    do i = 1, npe
2193      if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) new_nnb = new_nnb+1
2194    enddo
2195    allocate(new_nbpe(new_nnb))
2196    new_nnb = 0
2197    do i = 1, npe
2198      if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) then
2199        new_nnb = new_nnb+1
2200        new_nbpe(new_nnb) = i-1
2201      endif
2202    enddo
2203  end subroutine update_neighbor_pe
2204
2205  subroutine merge_comm_table(npe, org_nnb, org_nbpe, org_index, org_item, &
2206       new_nnb, new_nbpe, add_index, add_item, n_add, n_new, new_index, new_item)
2207    implicit none
2208    integer(kind=kint), intent(in) :: npe, org_nnb
2209    !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), org_item(:)
2210    integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:), org_item(:)
2211    integer(kind=kint), intent(in) :: new_nnb
2212    !integer(kind=kint), intent(in) :: new_nbpe(new_nnb), add_index(0:npe), add_item(:)
2213    integer(kind=kint), pointer, intent(in) :: new_nbpe(:)
2214    integer(kind=kint), allocatable, intent(in) :: add_index(:), add_item(:)
2215    integer(kind=kint), intent(in) :: n_add(npe), n_new(npe)
2216    integer(kind=kint), pointer, intent(out) :: new_index(:), new_item(:)
2217    integer(kind=kint), allocatable :: cnt(:)
2218    integer(kind=kint) :: i, irank, j, jrank, i0, j0, len
2219    ! if (associated(new_index)) deallocate(new_index)
2220    ! if (associated(new_item)) deallocate(new_item)
2221    allocate(new_index(0:new_nnb))
2222    new_index(0) = 0
2223    do i = 1, new_nnb
2224      irank = new_nbpe(i)
2225      new_index(i) = new_index(i-1) + n_new(irank+1)
2226    enddo
2227    allocate(new_item(new_index(new_nnb)))
2228    allocate(cnt(npe))
2229    cnt(:) = 0
2230    j = 1
2231    jrank = new_nbpe(j)
2232    do i = 1, org_nnb
2233      if (org_index(i) - org_index(i-1) == 0) cycle
2234      irank = org_nbpe(i)
2235      do while (jrank < irank)
2236        j = j + 1
2237        if (j > new_nnb) exit
2238        jrank = new_nbpe(j)
2239      enddo
2240      if (jrank /= irank) stop 'ERROR: merging comm table: org into new'
2241      i0 = org_index(i-1)
2242      len = org_index(i) - i0
2243      j0 = new_index(j-1)
2244      new_item(j0+1:j0+len) = org_item(i0+1:i0+len)
2245      cnt(jrank+1) = len
2246    enddo
2247    j = 1
2248    jrank = new_nbpe(j)
2249    do i = 1, npe
2250      if (n_add(i) == 0) cycle
2251      irank = i-1
2252      do while (jrank < irank)
2253        j = j + 1
2254        jrank = new_nbpe(j)
2255      enddo
2256      if (jrank /= irank) stop 'ERROR: merging comm table: add into new'
2257      i0 = add_index(i-1)
2258      len = add_index(i) - i0
2259      j0 = new_index(j-1) + cnt(jrank+1)
2260      new_item(j0+1:j0+len) = add_item(i0+1:i0+len)
2261      cnt(jrank+1) = cnt(jrank+1) + len
2262      if (cnt(jrank+1) /= new_index(j)-new_index(j-1)) stop 'ERROR: merging comm table'
2263    enddo
2264    deallocate(cnt)
2265  end subroutine merge_comm_table
2266
2267  subroutine copy_vals_to_BT_int(nnb, imp_rows_index, imp_cols_index, &
2268       imp_rows_item, map, ndof2, imp_vals_item, BT_int)
2269    implicit none
2270    integer(kind=kint), intent(in) :: nnb
2271    integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
2272    integer(kind=kint), intent(in) :: imp_rows_item(:,:), map(:)
2273    integer(kind=kint), intent(in) :: ndof2
2274    real(kind=kreal), intent(in) :: imp_vals_item(:)
2275    type (hecmwST_local_matrix), intent(inout) :: BT_int
2276    integer(kind=kint), allocatable :: cnt(:)
2277    integer(kind=kint) :: idom, is, ie, ic0, i, irow, ncol, j0, j
2278    allocate(cnt(BT_int%nr))
2279    cnt(:) = 0
2280    do idom = 1, nnb
2281      is = imp_rows_index(idom-1)+1
2282      ie = imp_rows_index(idom)
2283      ic0 = imp_cols_index(idom-1)
2284      do i = is, ie
2285        irow = imp_rows_item(1,i)
2286        ncol = imp_rows_item(2,i)
2287        if (irow < 1 .or. BT_int%nr < irow) stop 'ERROR: copy vals to BT_int: irow'
2288        j0 = BT_int%index(irow-1) + cnt(irow)
2289        do j = 1, ncol
2290          BT_int%item(j0+j) = map(ic0+j)
2291          BT_int%A(ndof2*(j0+j-1)+1:ndof2*(j0+j)) = imp_vals_item(ndof2*(ic0+j-1)+1:ndof2*(ic0+j))
2292        enddo
2293        cnt(irow) = cnt(irow) + ncol
2294        ic0 = ic0 + ncol
2295      enddo
2296      if (ic0 /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_int: ic0'
2297    enddo
2298    deallocate(cnt)
2299  end subroutine copy_vals_to_BT_int
2300
2301  subroutine sort_and_uniq_rows(BTmat)
2302    use hecmw_array_util
2303    implicit none
2304    type (hecmwST_local_matrix), intent(inout) :: BTmat
2305    integer(kind=kint) :: nr, ndof, ndof2
2306    integer(kind=kint) :: irow, is, ie, is_new, ie_new, i, i_new
2307    integer(kind=kint) :: ndup, ndup_tot
2308    integer(kind=kint) :: js, je, js_new, je_new
2309    integer(kind=kint) :: new_nnz
2310    integer(kind=kint), allocatable :: cnt(:)
2311    integer(kind=kint), pointer :: sort_item(:), new_index(:), new_item(:)
2312    real(kind=kreal), pointer :: new_A(:)
2313    logical :: sorted
2314    real(kind=kreal) :: t0, t1
2315    t0 = hecmw_wtime()
2316    nr = BTmat%nr
2317    ! check if already sorted
2318    sorted = .true.
2319    OUTER: do irow = 1, nr
2320      is = BTmat%index(irow-1)+1
2321      ie = BTmat%index(irow)
2322      do i = is, ie-1
2323        if (BTmat%item(i) >= BTmat%item(i+1)) then
2324          sorted = .false.
2325          exit OUTER
2326        endif
2327      enddo
2328    end do OUTER
2329    t1 = hecmw_wtime()
2330    if (TIMER >= 4) write(0, '(A,f10.4,L2)') "####### sort_and_uniq_rows (1) : ",t1-t0,sorted
2331    t0 = hecmw_wtime()
2332    if (sorted) return
2333    ! perform sort
2334    ndof = BTmat%ndof
2335    ndof2 = ndof*ndof
2336    ! duplicate item array (sort_item)
2337    allocate(sort_item(BTmat%nnz))
2338    do i = 1, BTmat%nnz
2339      sort_item(i) = BTmat%item(i)
2340    enddo
2341    ! sort and uniq item for each row
2342    allocate(cnt(nr))
2343    ndup_tot = 0
2344    !$omp parallel do default(none), &
2345      !$omp& schedule(dynamic,1), &
2346      !$omp& private(irow,is,ie,ndup), &
2347      !$omp& shared(nr,BTmat,sort_item,cnt), &
2348      !$omp& reduction(+:ndup_tot)
2349    do irow = 1, nr
2350      is = BTmat%index(irow-1)+1
2351      ie = BTmat%index(irow)
2352      call hecmw_qsort_int_array(sort_item, is, ie)
2353      call hecmw_uniq_int_array(sort_item, is, ie, ndup)
2354      cnt(irow) = (ie-is+1) - ndup
2355      ndup_tot = ndup_tot + ndup
2356    enddo
2357    !$omp end parallel do
2358    t1 = hecmw_wtime()
2359    if (TIMER >= 4) write(0, '(A,f10.4,I5)') "####### sort_and_uniq_rows (2) : ",t1-t0,ndup_tot
2360    t0 = hecmw_wtime()
2361    ! make new index and item array (new_index, new_item)
2362    if (ndup_tot == 0) then
2363      new_index => BTmat%index
2364      new_nnz = BTmat%nnz
2365      new_item => sort_item
2366    else
2367      allocate(new_index(0:nr))
2368      call make_index(nr, cnt, new_index)
2369      new_nnz = new_index(nr)
2370      allocate(new_item(new_nnz))
2371      do irow = 1, nr
2372        is = BTmat%index(irow-1)+1
2373        ie = is+cnt(irow)-1
2374        is_new = new_index(irow-1)+1
2375        ie_new = is_new+cnt(irow)-1
2376        new_item(is_new:ie_new) = sort_item(is:ie)
2377      enddo
2378      deallocate(sort_item)
2379    endif
2380    deallocate(cnt)
2381    t1 = hecmw_wtime()
2382    if (TIMER >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (3) : ",t1-t0
2383    t0 = hecmw_wtime()
2384    ! allocate and clear value array (new_A)
2385    allocate(new_A(ndof2*new_nnz))
2386    new_A(:) = 0.d0
2387    ! copy/add value from old A to new A
2388    !$omp parallel do default(none), &
2389      !$omp& schedule(dynamic,1), &
2390      !$omp& private(irow,is,ie,is_new,ie_new,i,i_new,js,je,js_new,je_new), &
2391      !$omp& shared(nr,BTmat,new_index,new_item,ndof2,new_A)
2392    do irow = 1, nr
2393      is = BTmat%index(irow-1)+1
2394      ie = BTmat%index(irow)
2395      is_new = new_index(irow-1)+1
2396      ie_new = new_index(irow)
2397      ! for each item in row
2398      do i = is, ie
2399        ! find place in new item
2400        call hecmw_bsearch_int_array(new_item, is_new, ie_new, BTmat%item(i), i_new)
2401        if (i_new == -1) stop 'ERROR: sort_and_uniq_rows'
2402        js = ndof2*(i-1)+1
2403        je = ndof2*i
2404        js_new = ndof2*(i_new-1)+1
2405        je_new = ndof2*i_new
2406        new_A(js_new:je_new) = new_A(js_new:je_new) + BTmat%A(js:je)
2407      enddo
2408    enddo
2409    !$omp end parallel do
2410    t1 = hecmw_wtime()
2411    if (TIMER >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (4) : ",t1-t0
2412    t0 = hecmw_wtime()
2413    ! deallocate/update nnz, index, item, A
2414    if (ndup_tot == 0) then
2415      deallocate(BTmat%item)
2416      BTmat%item => new_item
2417      deallocate(BTmat%A)
2418      BTmat%A => new_A
2419    else
2420      BTmat%nnz = new_nnz
2421      deallocate(BTmat%index)
2422      BTmat%index => new_index
2423      deallocate(BTmat%item)
2424      BTmat%item => new_item
2425      deallocate(BTmat%A)
2426      BTmat%A => new_A
2427    endif
2428  end subroutine sort_and_uniq_rows
2429
2430  subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2431    implicit none
2432    type (hecmwST_local_matrix), intent(in) :: Amat
2433    type (hecmwST_local_matrix), intent(in) :: Bmat
2434    type (hecmwST_local_matrix), intent(out) :: Cmat
2435    integer(kind=kint) :: ndof, ndof2, nr, nc, i, icnt, js, je, j, jcol, idx, i0, k
2436    integer(kind=kint), allocatable :: iw(:)
2437    if (Amat%ndof /= Bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2438    ndof = Amat%ndof
2439    ndof2 = ndof*ndof
2440    nr = min(Amat%nr, Bmat%nr)
2441    nc = max(Amat%nc, Bmat%nc)
2442    Cmat%ndof = ndof
2443    Cmat%nr = nr
2444    Cmat%nc = nc
2445    Cmat%nnz = 0
2446    allocate(Cmat%index(0:nr))
2447    Cmat%index(0) = 0
2448    allocate(iw(nc))
2449    do i = 1, nr
2450      icnt = 0
2451      ! Amat
2452      js = Amat%index(i-1)+1
2453      je = Amat%index(i)
2454      do j = js, je
2455        jcol = Amat%item(j)
2456        icnt = icnt + 1
2457        iw(icnt) = jcol
2458      enddo
2459      ! Bmat
2460      js = Bmat%index(i-1)+1
2461      je = Bmat%index(i)
2462      lj1: do j = js, je
2463        jcol = Bmat%item(j)
2464        do k = 1, icnt
2465          if (iw(k) == jcol) cycle lj1
2466        enddo
2467        icnt = icnt + 1
2468        iw(icnt) = jcol
2469      enddo lj1
2470      Cmat%index(i) = Cmat%index(i-1) + icnt
2471    enddo
2472    Cmat%nnz = Cmat%index(nr)
2473    allocate(Cmat%item(Cmat%nnz))
2474    allocate(Cmat%A(ndof2*Cmat%nnz))
2475    do i = 1, nr
2476      i0 = Cmat%index(i-1)
2477      icnt = 0
2478      ! Amat
2479      js = Amat%index(i-1)+1
2480      je = Amat%index(i)
2481      do j = js, je
2482        jcol = Amat%item(j)
2483        icnt = icnt + 1
2484        idx = i0 + icnt
2485        Cmat%item(idx) = jcol
2486        Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Amat%A(ndof2*(j-1)+1:ndof2*j)
2487      enddo
2488      ! Bmat
2489      js = Bmat%index(i-1)+1
2490      je = Bmat%index(i)
2491      lj2: do j = js, je
2492        jcol = Bmat%item(j)
2493        do k = 1, icnt
2494          idx = i0 + k
2495          if (Cmat%item(idx) == jcol) then
2496            Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = &
2497                 Cmat%A(ndof2*(idx-1)+1:ndof2*idx) + Bmat%A(ndof2*(j-1)+1:ndof2*j)
2498            cycle lj2
2499          endif
2500        enddo
2501        icnt = icnt + 1
2502        idx = i0 + icnt
2503        Cmat%item(idx) = jcol
2504        Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Bmat%A(ndof2*(j-1)+1:ndof2*j)
2505      enddo lj2
2506      if (i0 + icnt /= Cmat%index(i)) stop 'ERROR: merge localmat'
2507    enddo
2508    call sort_and_uniq_rows(Cmat)
2509  end subroutine hecmw_localmat_add
2510
2511  ! subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2512  !   implicit none
2513  !   type (hecmwST_local_matrix), intent(in) :: Amat
2514  !   type (hecmwST_local_matrix), intent(in) :: Bmat
2515  !   type (hecmwST_local_matrix), intent(out) :: Cmat
2516  !   integer(kind=kint) :: ndof, ndof2, nr, nc, i, js, je, j, jcol, nnz_row, idx, ks, ke, k, kcol
2517  !   if (Amat%ndof /= Bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2518  !   ndof = Amat%ndof
2519  !   ndof2 = ndof*ndof
2520  !   nr = min(Amat%nr, Bmat%nr)
2521  !   nc = max(Amat%nc, Bmat%nc)
2522  !   Cmat%ndof = ndof
2523  !   Cmat%nr = nr
2524  !   Cmat%nc = nc
2525  !   Cmat%nnz = Amat%index(nr) + Bmat%index(nr)
2526  !   allocate(Cmat%index(0:nr))
2527  !   allocate(Cmat%item(Cmat%nnz))
2528  !   allocate(Cmat%A(ndof2 * Cmat%nnz))
2529  !   Cmat%index(0) = 0
2530  !   idx = 0
2531  !   do i = 1, nr
2532  !     ! Amat
2533  !     js = Amat%index(i-1)+1
2534  !     je = Amat%index(i)
2535  !     do j = js, je
2536  !       idx = idx + 1
2537  !       Cmat%item(idx) = Amat%item(j)
2538  !       Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Amat%A(ndof2*(j-1)+1:ndof2*j)
2539  !     enddo
2540  !     ! Bmat
2541  !     js = Bmat%index(i-1)+1
2542  !     je = Bmat%index(i)
2543  !     do j = js, je
2544  !       idx = idx + 1
2545  !       Cmat%item(idx) = Bmat%item(j)
2546  !       Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Bmat%A(ndof2*(j-1)+1:ndof2*j)
2547  !     enddo
2548  !     Cmat%index(i) = idx
2549  !   enddo
2550  !   if (Cmat%index(nr) /= Cmat%nnz) stop 'ERROR: merge localmat'
2551  !   call sort_and_uniq_rows(Cmat)
2552  ! end subroutine hecmw_localmat_add
2553
2554  subroutine hecmw_localmat_init_with_hecmat(BKmat, hecMAT, num_lagrange)
2555    implicit none
2556    type (hecmwST_local_matrix), intent(inout) :: BKmat
2557    type (hecmwST_matrix), intent(in) :: hecMAT
2558    integer(kind=kint), optional, intent(in) :: num_lagrange
2559    integer(kind=kint) :: ndof, ndof2, i, idx, idx2, js, je, j, k
2560    integer(kind=kint), allocatable :: incl_nz(:), cnt(:)
2561    logical :: check_nonzero
2562    !check_nonzero = .false.
2563    check_nonzero = .true.  !!! always checking nonzero seems to be faster
2564    !
2565    ndof = hecMAT%NDOF
2566    ndof2 = ndof*ndof
2567    ! nr, nc, nnz
2568    BKmat%nr = hecMAT%NP
2569    BKmat%nc = hecMAT%NP
2570    BKmat%ndof = ndof
2571    !
2572    if (present(num_lagrange)) then       !!! TEMPORARY (DUE TO WRONG conMAT WHEN num_lagrange==0) !!!
2573      if (num_lagrange == 0) then
2574        BKmat%nnz = 0
2575        allocate(BKmat%index(0:BKmat%nr))
2576        BKmat%index(:) = 0
2577        BKmat%item => null()
2578        BKmat%A => null()
2579        return
2580      endif
2581      check_nonzero = .true.
2582    endif
2583    !
2584    if (check_nonzero) then
2585      allocate(incl_nz(hecMAT%NPL + hecMAT%NPU + hecMAT%NP))
2586      allocate(cnt(BKmat%nr))
2587      incl_nz(:) = 0
2588      !$omp parallel default(none), &
2589        !$omp& private(i,idx,js,je,j,k), &
2590        !$omp& shared(BKmat,hecMAT,cnt,ndof2,incl_nz)
2591      !$omp do
2592      do i = 1, BKmat%nr
2593        idx = hecMAT%indexL(i-1) + (i-1) + hecMAT%indexU(i-1)
2594        cnt(i) = 0
2595        ! lower
2596        js = hecMAT%indexL(i-1)+1
2597        je = hecMAT%indexL(i)
2598        do j = js, je
2599          idx = idx + 1
2600          do k = 1, ndof2
2601            if (hecMAT%AL(ndof2*(j-1)+k) /= 0.0d0) then
2602              incl_nz(idx) = 1
2603              cnt(i) = cnt(i) + 1
2604              exit
2605            endif
2606          enddo
2607        enddo
2608        ! diag
2609        idx = idx + 1
2610        do k = 1, ndof2
2611          if (hecMAT%D(ndof2*(i-1)+k) /= 0.0d0) then
2612            incl_nz(idx) = 1
2613            cnt(i) = cnt(i) + 1
2614            exit
2615          endif
2616        enddo
2617        ! upper
2618        js = hecMAT%indexU(i-1)+1
2619        je = hecMAT%indexU(i)
2620        do j = js, je
2621          idx = idx + 1
2622          do k = 1, ndof2
2623            if (hecMAT%AU(ndof2*(j-1)+k) /= 0.0d0) then
2624              incl_nz(idx) = 1
2625              cnt(i) = cnt(i) + 1
2626              exit
2627            endif
2628          enddo
2629        enddo
2630        if (idx /= hecMAT%indexL(i) + i + hecMAT%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: count'
2631      enddo
2632      !$omp end do
2633      !$omp end parallel
2634      ! index
2635      allocate(BKmat%index(0:BKmat%nr))
2636      call make_index(BKmat%nr, cnt, BKmat%index)
2637      deallocate(cnt)
2638      BKmat%nnz = BKmat%index(BKmat%nr)
2639      ! item, A
2640      allocate(BKmat%item(BKmat%nnz))
2641      allocate(BKmat%A(ndof2 * BKmat%nnz))
2642      !$omp parallel default(none), &
2643        !$omp& private(i,idx,idx2,js,je,j), &
2644        !$omp& shared(BKmat,hecMAT,ndof2,incl_nz)
2645      !$omp do
2646      do i = 1, BKmat%nr
2647        idx = hecMAT%indexL(i-1) + (i-1) + hecMAT%indexU(i-1)
2648        idx2 = BKmat%index(i-1)
2649        ! lower
2650        js = hecMAT%indexL(i-1)+1
2651        je = hecMAT%indexL(i)
2652        do j = js, je
2653          idx = idx + 1
2654          if (incl_nz(idx) == 1) then
2655            idx2 = idx2 + 1
2656            BKmat%item(idx2) = hecMAT%itemL(j)
2657            BKmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecMAT%AL(ndof2*(j-1)+1:ndof2*j)
2658          endif
2659        enddo
2660        ! diag
2661        idx = idx + 1
2662        if (incl_nz(idx) == 1) then
2663          idx2 = idx2 + 1
2664          BKmat%item(idx2) = i
2665          BKmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecMAT%D(ndof2*(i-1)+1:ndof2*i)
2666        endif
2667        ! upper
2668        js = hecMAT%indexU(i-1)+1
2669        je = hecMAT%indexU(i)
2670        do j = js, je
2671          idx = idx + 1
2672          if (incl_nz(idx) == 1) then
2673            idx2 = idx2 + 1
2674            BKmat%item(idx2) = hecMAT%itemU(j)
2675            BKmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecMAT%AU(ndof2*(j-1)+1:ndof2*j)
2676          endif
2677        enddo
2678        if (idx /= hecMAT%indexL(i) + i + hecMAT%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2679        if (idx2 /= BKmat%index(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: index'
2680      enddo
2681      !$omp end do
2682      !$omp end parallel
2683      deallocate(incl_nz)
2684    else
2685      BKmat%nnz = hecMAT%NPL + hecMAT%NP + hecMAT%NPU
2686      allocate(BKmat%index(0:BKmat%nr))
2687      allocate(BKmat%item(BKmat%nnz))
2688      allocate(BKmat%A(ndof2 * BKmat%nnz))
2689      BKmat%index(0) = 0
2690      !$omp parallel do default(none), &
2691        !$omp& private(i,idx,js,je,j), &
2692        !$omp& shared(BKmat,hecMAT,ndof2)
2693      do i = 1, BKmat%nr
2694        idx = hecMAT%indexL(i-1) + (i-1) + hecMAT%indexU(i-1)
2695        ! lower
2696        js = hecMAT%indexL(i-1)+1
2697        je = hecMAT%indexL(i)
2698        do j = js, je
2699          idx = idx + 1
2700          BKmat%item(idx) = hecMAT%itemL(j)
2701          BKmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecMAT%AL(ndof2*(j-1)+1:ndof2*j)
2702        enddo
2703        ! diag
2704        idx = idx + 1
2705        BKmat%item(idx) = i
2706        BKmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecMAT%D(ndof2*(i-1)+1:ndof2*i)
2707        ! upper
2708        js = hecMAT%indexU(i-1)+1
2709        je = hecMAT%indexU(i)
2710        do j = js, je
2711          idx = idx + 1
2712          BKmat%item(idx) = hecMAT%itemU(j)
2713          BKmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecMAT%AU(ndof2*(j-1)+1:ndof2*j)
2714        enddo
2715        BKmat%index(i) = idx
2716        if (idx /= hecMAT%indexL(i) + i + hecMAT%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2717      enddo
2718      !$omp end parallel do
2719    endif
2720  end subroutine hecmw_localmat_init_with_hecmat
2721
2722  subroutine hecmw_localmat_add_hecmat(BKmat, hecMAT)
2723    implicit none
2724    type (hecmwST_local_matrix), intent(inout) :: BKmat
2725    type (hecmwST_matrix), intent(in) :: hecMAT
2726    type (hecmwST_local_matrix) :: W1mat, W2mat
2727    !! Should Be Simple If Non-Zero Profile Is Kept !!
2728    call hecmw_localmat_init_with_hecmat(W1mat, hecMAT)
2729    if (DEBUG >= 3) then
2730      write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)'
2731      if (DEBUG == 3) then
2732        call hecmw_localmat_write_ij(W1mat, 700+hecmw_comm_get_rank())
2733      else
2734        call hecmw_localmat_write(W1mat, 700+hecmw_comm_get_rank())
2735      endif
2736    endif
2737    call hecmw_localmat_add(BKmat, W1mat, W2mat)
2738    call hecmw_localmat_free(BKmat)
2739    call hecmw_localmat_free(W1mat)
2740    BKmat%nr = W2mat%nr
2741    BKmat%nc = W2mat%nc
2742    BKmat%nnz = W2mat%nnz
2743    BKmat%ndof = W2mat%ndof
2744    BKmat%index => W2mat%index
2745    BKmat%item => W2mat%item
2746    BKmat%A => W2mat%A
2747  end subroutine hecmw_localmat_add_hecmat
2748
2749  subroutine hecmw_localmat_multmat(BKmat, BTmat, hecMESH, BKTmat)
2750    implicit none
2751    type (hecmwST_local_matrix), intent(inout) :: BKmat
2752    type (hecmwST_local_matrix), intent(inout) :: BTmat
2753    type (hecmwST_local_mesh), intent(inout) :: hecMESH
2754    type (hecmwST_local_matrix), intent(out) :: BKTmat
2755    type (hecmwST_matrix_comm) :: hecCOMM
2756    type (hecmwST_local_mesh) :: hecMESHnew
2757    type (hecmwST_local_matrix), allocatable :: BT_exp(:)
2758    type (hecmwST_local_matrix) :: BT_imp, BT_all
2759    integer(kind=kint), allocatable :: exp_cols_index(:)
2760    integer(kind=kint), allocatable :: exp_cols_item(:,:)
2761    real(kind=kreal) :: t0, t1
2762    t0 = hecmw_wtime()
2763    !
2764    call make_comm_table(BKmat, hecMESH, hecCOMM)
2765    if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: make_comm_table done'
2766    t1 = hecmw_wtime()
2767    if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (1) : ',t1-t0
2768    t0 = hecmw_wtime()
2769    !
2770    if (BTmat%nr > hecMESH%nn_internal) then
2771      ! consider only internal part of BTmat
2772      if (DEBUG >= 1) write(0,'(A)') 'DEBUG: hecmw_localmat_multmat: ignore external part of BTmat'
2773      BTmat%nr = hecMESH%nn_internal
2774      BTmat%nnz = BTmat%index(BTmat%nr)
2775    endif
2776    !
2777    call extract_BT_exp(BTmat, hecCOMM, BT_exp)
2778    if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: extract_BT_exp done'
2779    t1 = hecmw_wtime()
2780    if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (2) : ',t1-t0
2781    t0 = hecmw_wtime()
2782    !
2783    call prepare_column_info(hecMESH, BT_exp, exp_cols_index, exp_cols_item)
2784    if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: prepare column info done'
2785    t1 = hecmw_wtime()
2786    if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (3) : ',t1-t0
2787    t0 = hecmw_wtime()
2788    !
2789    call send_BT_exp_and_recv_BT_imp(hecMESH, hecCOMM, BT_exp, exp_cols_index, exp_cols_item, BT_imp, hecMESHnew)
2790    if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: send BT_exp and recv BT_imp done'
2791    t1 = hecmw_wtime()
2792    if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (4) : ',t1-t0
2793    t0 = hecmw_wtime()
2794    call free_comm_table(hecCOMM)
2795    !
2796    call concat_BTmat_and_BT_imp(BTmat, BT_imp, BT_all)
2797    if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: concat BTmat and BT_imp into BT_all done'
2798    t1 = hecmw_wtime()
2799    if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (5) : ',t1-t0
2800    t0 = hecmw_wtime()
2801    call hecmw_localmat_free(BT_imp)
2802    !
2803    call multiply_mat_mat(BKmat, BT_all, BKTmat)
2804    if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: multiply BKmat and BT_all into BKTmat done'
2805    t1 = hecmw_wtime()
2806    if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (6) : ',t1-t0
2807    t0 = hecmw_wtime()
2808    call hecmw_localmat_free(BT_all)
2809    !
2810    if (hecMESH%n_neighbor_pe > 0) then
2811      hecMESH%n_node = hecMESHnew%n_node
2812      hecMESH%n_neighbor_pe = hecMESHnew%n_neighbor_pe
2813      deallocate(hecMESH%neighbor_pe)
2814      deallocate(hecMESH%import_index)
2815      deallocate(hecMESH%export_index)
2816      deallocate(hecMESH%import_item)
2817      deallocate(hecMESH%export_item)
2818      deallocate(hecMESH%node_ID)
2819      deallocate(hecMESH%global_node_ID)
2820      hecMESH%neighbor_pe => hecMESHnew%neighbor_pe
2821      hecMESH%import_index => hecMESHnew%import_index
2822      hecMESH%export_index => hecMESHnew%export_index
2823      hecMESH%import_item => hecMESHnew%import_item
2824      hecMESH%export_item => hecMESHnew%export_item
2825      hecMESH%node_ID => hecMESHnew%node_ID
2826      hecMESH%global_node_ID => hecMESHnew%global_node_ID
2827      if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: update hecMESH done'
2828      t1 = hecmw_wtime()
2829      if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (7) : ',t1-t0
2830    endif
2831  end subroutine hecmw_localmat_multmat
2832
2833  subroutine make_comm_table(BKmat, hecMESH, hecCOMM)
2834    use m_hecmw_comm_f
2835    implicit none
2836    type (hecmwST_local_matrix), intent(in) :: BKmat
2837    type (hecmwST_local_mesh), intent(in) :: hecMESH
2838    type (hecmwST_matrix_comm), intent(out) :: hecCOMM
2839    integer(kind=kint) :: nn_int, nn_ext, nnb, i, icol, irank, idom, idx, n_send, tag, js, je, len
2840    integer(kind=kint), allocatable :: is_nz_col(:), imp_cnt(:), exp_cnt(:), import_item_remote(:)
2841    integer(kind=kint), allocatable :: requests(:), statuses(:,:)
2842    hecCOMM%zero = hecMESH%zero
2843    hecCOMM%HECMW_COMM = hecMESH%MPI_COMM
2844    hecCOMM%PETOT = hecMESH%PETOT
2845    hecCOMM%PEsmpTOT = hecMESH%PEsmpTOT
2846    hecCOMM%my_rank = hecMESH%my_rank
2847    hecCOMM%errnof = hecMESH%errnof
2848    hecCOMM%n_subdomain = hecMESH%n_subdomain
2849    hecCOMM%n_neighbor_pe = hecMESH%n_neighbor_pe
2850    allocate(hecCOMM%neighbor_pe(hecCOMM%n_neighbor_pe))
2851    hecCOMM%neighbor_pe(:) = hecMESH%neighbor_pe(:)
2852    !
2853    nn_int = hecMESH%nn_internal
2854    nn_ext = hecMESH%n_node - hecMESH%nn_internal
2855    nnb = hecCOMM%n_neighbor_pe
2856    !
2857    ! check_external_nz_cols (by profile (not number))
2858    allocate(is_nz_col(nn_ext))
2859    is_nz_col(:) = 0
2860    do i = 1, BKmat%index(nn_int)
2861      icol = BKmat%item(i)
2862      if (icol > nn_int) is_nz_col(icol - nn_int) = 1
2863    enddo
2864    !
2865    ! count_nz_cols_per_rank
2866    allocate(imp_cnt(nnb))
2867    imp_cnt(:) = 0
2868    do i = 1, nn_ext
2869      if (is_nz_col(i) == 1) then
2870        irank = hecMESH%node_ID(2*(nn_int+i))
2871        call rank_to_idom(hecMESH, irank, idom)
2872        imp_cnt(idom) = imp_cnt(idom) + 1
2873      endif
2874    enddo
2875    if (DEBUG >= 3) write(0,*) '    DEBUG3: imp_cnt',imp_cnt(:)
2876    !
2877    ! make_index
2878    allocate(hecCOMM%import_index(0:nnb))
2879    call make_index(nnb, imp_cnt, hecCOMM%import_index)
2880    if (DEBUG >= 3) write(0,*) '    DEBUG3: import_index',hecCOMM%import_index(:)
2881    !
2882    ! fill item
2883    allocate(hecCOMM%import_item(hecCOMM%import_index(nnb)))
2884    imp_cnt(:) = 0
2885    do i = 1, nn_ext
2886      if (is_nz_col(i) == 1) then
2887        irank = hecMESH%node_ID(2*(nn_int+i))
2888        call rank_to_idom(hecMESH, irank, idom)
2889        imp_cnt(idom) = imp_cnt(idom) + 1
2890        idx = hecCOMM%import_index(idom-1)+imp_cnt(idom)
2891        hecCOMM%import_item(idx) = nn_int+i
2892      endif
2893    enddo
2894    if (DEBUG >= 3) write(0,*) '    DEBUG3: import_item',hecCOMM%import_item(:)
2895    !
2896    allocate(import_item_remote(hecCOMM%import_index(nnb)))
2897    do i = 1, hecCOMM%import_index(nnb)
2898      import_item_remote(i) = hecMESH%node_ID(2*hecCOMM%import_item(i)-1)
2899    enddo
2900    if (DEBUG >= 3) write(0,*) '    DEBUG3: import_item_remote',import_item_remote(:)
2901    !
2902    allocate(requests(2*nnb))
2903    allocate(statuses(HECMW_STATUS_SIZE, 2*nnb))
2904    !
2905    ! send/recv
2906    n_send = 0
2907    do idom = 1, nnb
2908      irank = hecCOMM%neighbor_pe(idom)
2909      n_send = n_send + 1
2910      tag = 6001
2911      call HECMW_ISEND_INT(imp_cnt(idom), 1, irank, tag, hecCOMM%HECMW_COMM, requests(n_send))
2912      if (imp_cnt(idom) > 0) then
2913        js = hecCOMM%import_index(idom-1)+1
2914        je = hecCOMM%import_index(idom)
2915        len = je-js+1
2916        n_send = n_send + 1
2917        tag = 6002
2918        call HECMW_ISEND_INT(import_item_remote(js:je), len, irank, tag, &
2919             hecCOMM%HECMW_COMM, requests(n_send))
2920      endif
2921    enddo
2922    !
2923    ! index
2924    allocate(exp_cnt(nnb))
2925    do idom = 1, nnb
2926      irank = hecCOMM%neighbor_pe(idom)
2927      tag = 6001
2928      call HECMW_RECV_INT(exp_cnt(idom), 1, irank, tag, hecCOMM%HECMW_COMM, statuses(:,1))
2929    enddo
2930    allocate(hecCOMM%export_index(0:nnb))
2931    call make_index(nnb, exp_cnt, hecCOMM%export_index)
2932    if (DEBUG >= 3) write(0,*) '    DEBUG3: export_index',hecCOMM%export_index(:)
2933    !
2934    ! item
2935    allocate(hecCOMM%export_item(hecCOMM%export_index(nnb)))
2936    do idom = 1, nnb
2937      if (exp_cnt(idom) <= 0) cycle
2938      irank = hecCOMM%neighbor_pe(idom)
2939      js = hecCOMM%export_index(idom-1)+1
2940      je = hecCOMM%export_index(idom)
2941      len = je-js+1
2942      tag = 6002
2943      call HECMW_RECV_INT(hecCOMM%export_item(js:je), len, irank, tag, &
2944           hecCOMM%HECMW_COMM, statuses(:,1))
2945    enddo
2946    if (DEBUG >= 3) write(0,*) '    DEBUG3: export_item',hecCOMM%export_item(:)
2947    call HECMW_Waitall(n_send, requests, statuses)
2948    !
2949    deallocate(imp_cnt)
2950    deallocate(exp_cnt)
2951    deallocate(import_item_remote)
2952  end subroutine make_comm_table
2953
2954  subroutine free_comm_table(hecCOMM)
2955    implicit none
2956    type (hecmwST_matrix_comm), intent(inout) :: hecCOMM
2957    deallocate(hecCOMM%neighbor_pe)
2958    deallocate(hecCOMM%import_index)
2959    deallocate(hecCOMM%import_item)
2960    deallocate(hecCOMM%export_index)
2961    deallocate(hecCOMM%export_item)
2962  end subroutine free_comm_table
2963
2964  subroutine extract_BT_exp(BTmat, hecCOMM, BT_exp)
2965    implicit none
2966    type (hecmwST_local_matrix), intent(in) :: BTmat
2967    type (hecmwST_matrix_comm), intent(in) :: hecCOMM
2968    type (hecmwST_local_matrix), allocatable, intent(out) :: BT_exp(:)
2969    integer(kind=kint) :: ndof, ndof2, idom, idx_0, idx_n, j, jrow, nnz_row, idx, ks, ke, k
2970    if (hecCOMM%n_neighbor_pe == 0) return
2971    allocate(BT_exp(hecCOMM%n_neighbor_pe))
2972    ndof = BTmat%ndof
2973    ndof2 = ndof * ndof
2974    do idom = 1, hecCOMM%n_neighbor_pe
2975      idx_0 = hecCOMM%export_index(idom-1)
2976      idx_n = hecCOMM%export_index(idom)
2977      BT_exp(idom)%nr = idx_n - idx_0
2978      BT_exp(idom)%nc = BTmat%nc
2979      BT_exp(idom)%nnz = 0
2980      BT_exp(idom)%ndof = ndof
2981      allocate(BT_exp(idom)%index(0:BT_exp(idom)%nr))
2982      BT_exp(idom)%index(0) = 0
2983      do j = 1, BT_exp(idom)%nr
2984        jrow = hecCOMM%export_item(idx_0 + j)
2985        nnz_row = BTmat%index(jrow) - BTmat%index(jrow-1)
2986        BT_exp(idom)%index(j) = BT_exp(idom)%index(j-1) + nnz_row
2987      enddo
2988      BT_exp(idom)%nnz = BT_exp(idom)%index(BT_exp(idom)%nr)
2989      allocate(BT_exp(idom)%item(BT_exp(idom)%nnz))
2990      allocate(BT_exp(idom)%A(ndof2 * BT_exp(idom)%nnz))
2991      idx = 0
2992      do j = 1, BT_exp(idom)%nr
2993        jrow = hecCOMM%export_item(idx_0 + j)
2994        ks = BTmat%index(jrow-1) + 1
2995        ke = BTmat%index(jrow)
2996        do k = ks, ke
2997          idx = idx + 1
2998          BT_exp(idom)%item(idx) = BTmat%item(k)
2999          BT_exp(idom)%A(ndof2*(idx-1)+1:ndof2*idx) = BTmat%A(ndof2*(k-1)+1:ndof2*k)
3000        enddo
3001        if (idx /= BT_exp(idom)%index(j)) stop 'ERROR: extract BT_exp'
3002      enddo
3003    enddo
3004  end subroutine extract_BT_exp
3005
3006  subroutine send_BT_exp_and_recv_BT_imp(hecMESH, hecCOMM, BT_exp, exp_cols_index, exp_cols_item, BT_imp, hecMESHnew)
3007    use m_hecmw_comm_f
3008    implicit none
3009    type (hecmwST_local_mesh), intent(in) :: hecMESH
3010    type (hecmwST_matrix_comm), intent(in) :: hecCOMM
3011    type (hecmwST_local_matrix), allocatable, intent(inout) :: BT_exp(:)
3012    integer(kind=kint), allocatable, intent(inout) :: exp_cols_index(:)
3013    integer(kind=kint), allocatable, intent(inout) :: exp_cols_item(:,:)
3014    type (hecmwST_local_matrix), intent(out) :: BT_imp
3015    type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
3016    integer(kind=kint), allocatable :: nnz_imp(:), cnt(:), index_imp(:)
3017    integer(kind=kint), allocatable :: imp_cols_index(:)
3018    integer(kind=kint), allocatable :: imp_cols_item(:,:)
3019    real(kind=kreal), allocatable :: imp_vals_item(:)
3020    integer(kind=kint) :: nnb, ndof, ndof2, idom, irank, nr, n_send, tag, idx_0, idx_n, j, jj, nnz
3021    integer(kind=kint), allocatable :: requests(:)
3022    integer(kind=kint), allocatable :: statuses(:,:)
3023    integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
3024    integer(kind=kint) :: n_add_node, i0
3025    nnb = hecCOMM%n_neighbor_pe
3026    if (nnb == 0) then
3027      BT_imp%nr = 0
3028      BT_imp%nc = 0
3029      BT_imp%nnz = 0
3030      BT_imp%ndof = 0
3031      allocate(BT_imp%index(0:0))
3032      BT_imp%index(0) = 0
3033      return
3034    endif
3035    ndof = BT_exp(1)%ndof
3036    ndof2 = ndof*ndof
3037    allocate(requests(nnb*3))
3038    allocate(statuses(HECMW_STATUS_SIZE, nnb*3))
3039    n_send = 0
3040    do idom = 1, nnb
3041      irank = hecCOMM%neighbor_pe(idom)
3042      nr = BT_exp(idom)%nr
3043      if (nr == 0) cycle
3044      n_send = n_send + 1
3045      tag = 3001
3046      call HECMW_ISEND_INT(BT_exp(idom)%index(0:BT_exp(idom)%nr), BT_exp(idom)%nr + 1, &
3047           irank, tag, hecCOMM%HECMW_COMM, requests(n_send))
3048      if (BT_exp(idom)%nnz == 0) cycle
3049      n_send = n_send + 1
3050      tag = 3002
3051      call HECMW_ISEND_INT(exp_cols_item(1,exp_cols_index(idom-1)+1), &
3052           cNCOL_ITEM * BT_exp(idom)%nnz, irank, tag, hecCOMM%HECMW_COMM, requests(n_send))
3053      n_send = n_send + 1
3054      tag = 3003
3055      call HECMW_ISEND_R(BT_exp(idom)%A, ndof2 * BT_exp(idom)%nnz, &
3056           irank, tag, hecCOMM%HECMW_COMM, requests(n_send))
3057    enddo
3058    !
3059    ! BT_imp%nr = hecCOMM%import_index(nnb)
3060    BT_imp%nr = hecMESH%n_node - hecMESH%nn_internal
3061    BT_imp%nc = 0  !!! TEMPORARY
3062    BT_imp%nnz = 0
3063    BT_imp%ndof = ndof
3064    !
3065    allocate(nnz_imp(nnb))
3066    allocate(cnt(BT_imp%nr))
3067    !
3068    cnt(:) = 0
3069    do idom = 1, nnb
3070      irank = hecCOMM%neighbor_pe(idom)
3071      idx_0 = hecCOMM%import_index(idom-1)
3072      idx_n = hecCOMM%import_index(idom)
3073      nr = idx_n - idx_0
3074      if (nr == 0) then
3075        nnz_imp(idom) = 0
3076        cycle
3077      endif
3078      allocate(index_imp(0:nr))
3079      tag = 3001
3080      call HECMW_RECV_INT(index_imp(0:nr), nr+1, irank, tag, &
3081           hecCOMM%HECMW_COMM, statuses(:,1))
3082      nnz_imp(idom) = index_imp(nr)
3083      do j = 1, nr
3084        jj = hecCOMM%import_item(idx_0 + j) - hecMESH%nn_internal
3085        if (jj < 1 .or. BT_imp%nr < jj) stop 'ERROR: jj out of range'
3086        if (cnt(jj) /= 0) stop 'ERROR: duplicate import rows?'
3087        cnt(jj) = index_imp(j) - index_imp(j-1)
3088      enddo
3089      deallocate(index_imp)
3090    enddo
3091    !
3092    allocate(imp_cols_index(0:nnb))
3093    call make_index(nnb, nnz_imp, imp_cols_index)
3094    deallocate(nnz_imp)
3095    !
3096    allocate(BT_imp%index(0:BT_imp%nr))
3097    call make_index(BT_imp%nr, cnt, BT_imp%index)
3098    deallocate(cnt)
3099    !
3100    BT_imp%nnz = BT_imp%index(BT_imp%nr)
3101    if (BT_imp%nnz /= imp_cols_index(nnb)) &
3102         stop 'ERROR: total num of nonzero of BT_imp'
3103    !
3104    allocate(imp_cols_item(cNCOL_ITEM, BT_imp%nnz))
3105    allocate(imp_vals_item(ndof2 * BT_imp%nnz))
3106    !
3107    do idom = 1, nnb
3108      irank = hecCOMM%neighbor_pe(idom)
3109      idx_0 = imp_cols_index(idom-1)
3110      idx_n = imp_cols_index(idom)
3111      nnz = idx_n - idx_0
3112      if (nnz == 0) cycle
3113      tag = 3002
3114      call HECMW_RECV_INT(imp_cols_item(1, idx_0 + 1), cNCOL_ITEM * nnz, &
3115           irank, tag, hecCOMM%HECMW_COMM, statuses(:,1))
3116      tag = 3003
3117      call HECMW_RECV_R(imp_vals_item(ndof2*idx_0 + 1), ndof2 * nnz, &
3118           irank, tag, hecCOMM%HECMW_COMM, statuses(:,1))
3119    enddo
3120    call HECMW_Waitall(n_send, requests, statuses)
3121    if (DEBUG >= 2) write(0,*) '  DEBUG2: send BT_imp and recv into temporary data done'
3122    !
3123    deallocate(requests)
3124    deallocate(statuses)
3125    !
3126    do idom = 1, nnb
3127      call hecmw_localmat_free(BT_exp(idom))
3128    enddo
3129    deallocate(BT_exp)
3130    deallocate(exp_cols_index)
3131    deallocate(exp_cols_item)
3132    !
3133    call copy_mesh(hecMESH, hecMESHnew)
3134    !
3135    call map_imported_cols(hecMESHnew, imp_cols_index(nnb), imp_cols_item, n_add_node, add_nodes, map, i0)
3136    if (DEBUG >= 2) write(0,*) '  DEBUG2: map imported cols done'
3137    !
3138    call update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
3139    if (DEBUG >= 2) write(0,*) '  DEBUG2: update comm_table done'
3140    !
3141    BT_imp%nc = hecMESHnew%n_node
3142    !
3143    allocate(BT_imp%item(BT_imp%nnz))
3144    allocate(BT_imp%A(ndof2 * BT_imp%nnz))
3145    call copy_vals_to_BT_imp(hecCOMM, hecMESH%nn_internal, imp_cols_index, map, imp_vals_item, BT_imp)
3146    if (DEBUG >= 2) write(0,*) '  DEBUG2: copy vals to BT_imp done'
3147    !
3148    deallocate(imp_cols_index)
3149    deallocate(imp_cols_item)
3150    deallocate(imp_vals_item)
3151    deallocate(map)
3152  end subroutine send_BT_exp_and_recv_BT_imp
3153
3154  subroutine copy_vals_to_BT_imp(hecCOMM, nn_internal, imp_cols_index, map, imp_vals_item, BT_imp)
3155    implicit none
3156    type (hecmwST_matrix_comm), intent(in) :: hecCOMM
3157    integer(kind=kint), intent(in) :: nn_internal
3158    integer(kind=kint), allocatable, intent(in) :: imp_cols_index(:)
3159    integer(kind=kint), intent(in) :: map(:)
3160    real(kind=kreal), intent(in) :: imp_vals_item(:)
3161    type (hecmwST_local_matrix), intent(inout) :: BT_imp
3162    integer(kind=kint) :: nnb, ndof2, idx, idom, idx_0, idx_n, nr, j, jrow, ks, ke, k
3163    nnb = hecCOMM%n_neighbor_pe
3164    ndof2 = BT_imp%ndof ** 2
3165    idx = 0
3166    do idom = 1, nnb
3167      idx_0 = hecCOMM%import_index(idom-1)
3168      idx_n = hecCOMM%import_index(idom)
3169      nr = idx_n - idx_0
3170      if (nr == 0) cycle
3171      do j = 1, nr
3172        jrow = hecCOMM%import_item(idx_0 + j) - nn_internal
3173        ks = BT_imp%index(jrow-1)+1
3174        ke = BT_imp%index(jrow)
3175        do k = ks, ke
3176          idx = idx + 1
3177          BT_imp%item(k) = map(idx)
3178          BT_imp%A(ndof2*(k-1)+1:ndof2*k) = imp_vals_item(ndof2*(idx-1)+1:ndof2*idx)
3179        enddo
3180      enddo
3181      if (idx /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_imp'
3182    enddo
3183  end subroutine copy_vals_to_BT_imp
3184
3185  subroutine concat_BTmat_and_BT_imp(BTmat, BT_imp, BT_all)
3186    implicit none
3187    type (hecmwST_local_matrix), intent(in) :: BTmat
3188    type (hecmwST_local_matrix), intent(in) :: BT_imp
3189    type (hecmwST_local_matrix), intent(out) :: BT_all
3190    integer(kind=kint) :: ndof, ndof2, i, ii
3191    ndof = BTmat%ndof
3192    if (BT_imp%nr > 0 .and. BT_imp%ndof /= ndof) stop 'ERROR: concat BTmat and BT_imp: ndof'
3193    ndof2 = ndof*ndof
3194    BT_all%nr = BTmat%nr + BT_imp%nr
3195    BT_all%nc = max(BTmat%nc, BT_imp%nc)
3196    BT_all%nnz = BTmat%nnz + BT_imp%nnz
3197    BT_all%ndof = ndof
3198    allocate(BT_all%index(0:BT_all%nr))
3199    allocate(BT_all%item(BT_all%nnz))
3200    allocate(BT_all%A(ndof2 * BT_all%nnz))
3201    BT_all%index(0) = 0
3202    do i = 1, BTmat%nr
3203      BT_all%index(i) = BTmat%index(i)
3204    enddo
3205    do i = 1, BT_imp%nr
3206      BT_all%index(BTmat%nr+i) = BT_all%index(BTmat%nr+i-1) + &
3207           BT_imp%index(i) - BT_imp%index(i-1)
3208    enddo
3209    do i = 1, BTmat%nnz
3210      BT_all%item(i) = BTmat%item(i)
3211      BT_all%A(ndof2*(i-1)+1:ndof2*i) = BTmat%A(ndof2*(i-1)+1:ndof2*i)
3212    enddo
3213    do i = 1, BT_imp%nnz
3214      ii = BTmat%nnz + i
3215      BT_all%item(ii) = BT_imp%item(i)
3216      BT_all%A(ndof2*(ii-1)+1:ndof2*ii) = BT_imp%A(ndof2*(i-1)+1:ndof2*i)
3217    enddo
3218  end subroutine concat_BTmat_and_BT_imp
3219
3220  subroutine multiply_mat_mat(Amat, Bmat, Cmat)
3221    implicit none
3222    type (hecmwST_local_matrix), intent(in) :: Amat
3223    type (hecmwST_local_matrix), intent(in) :: Bmat
3224    type (hecmwST_local_matrix), intent(out) :: Cmat
3225    integer(kind=kint) :: ndof, ndof2, nr, nc, nnz, i, icnt
3226    integer(kind=kint) :: js, je, j, jj, ks, ke, k, kk, l, ll, l0
3227    integer(kind=kint), allocatable :: iw(:)
3228    real(kind=kreal), pointer :: Ap(:), Bp(:), Cp(:)
3229    real(kind=kreal) :: t0, t1
3230    t0 = hecmw_wtime()
3231    if (Amat%ndof /= Bmat%ndof) stop 'ERROR: multiply_mat_mat: unmatching ndof'
3232    ndof = Amat%ndof
3233    ndof2 = ndof*ndof
3234    nr = Amat%nr
3235    nc = Bmat%nc
3236    if (Amat%nc /= Bmat%nr) then
3237      write(0,*) 'Amat: nr, nc = ', Amat%nr, Amat%nc
3238      write(0,*) 'Bmat: nr, nc = ', Bmat%nr, Bmat%nc
3239      stop 'ERROR: multiply_mat_mat: unmatching size'
3240    endif
3241    Cmat%ndof = ndof
3242    Cmat%nr = nr
3243    Cmat%nc = nc
3244    allocate(Cmat%index(0:nr))
3245    Cmat%index(0) = 0
3246    !$omp parallel default(none), &
3247      !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,l), &
3248      !$omp& shared(nr,nc,Amat,Bmat,Cmat)
3249    allocate(iw(nc))
3250    !$omp do
3251    do i = 1, nr
3252      icnt = 0
3253      js = Amat%index(i-1)+1
3254      je = Amat%index(i)
3255      do j = js, je
3256        jj = Amat%item(j)
3257        ks = Bmat%index(jj-1)+1
3258        ke = Bmat%index(jj)
3259        kl1: do k = ks, ke
3260          kk = Bmat%item(k)
3261          do l = 1, icnt
3262            if (iw(l) == kk) cycle kl1
3263          enddo
3264          icnt = icnt + 1
3265          iw(icnt) = kk
3266        enddo kl1
3267      enddo
3268      Cmat%index(i) = icnt
3269    enddo
3270    !$omp end do
3271    deallocate(iw)
3272    !$omp end parallel
3273    do i = 1, nr
3274      Cmat%index(i) = Cmat%index(i-1) + Cmat%index(i)
3275    enddo
3276    nnz = Cmat%index(nr)
3277    Cmat%nnz = nnz
3278    !write(0,*) 'nnz',nnz
3279    t1 = hecmw_wtime()
3280    if (TIMER >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (1) : ",t1-t0
3281    t0 = hecmw_wtime()
3282    allocate(Cmat%item(nnz))
3283    allocate(Cmat%A(ndof2 * nnz))
3284    Cmat%A(:) = 0.0d0
3285    !$omp parallel default(none), &
3286      !$omp& private(i,icnt,l0,js,je,j,jj,Ap,ks,ke,k,kk,Bp,ll,l,Cp), &
3287      !$omp& shared(nr,Cmat,Amat,Bmat,ndof2,ndof)
3288    !$omp do
3289    do i = 1, nr
3290      icnt = 0
3291      l0 = Cmat%index(i-1)
3292      ! item
3293      js = Amat%index(i-1)+1
3294      je = Amat%index(i)
3295      do j = js, je
3296        jj = Amat%item(j)
3297        Ap => Amat%A(ndof2*(j-1)+1:ndof2*j)
3298        ks = Bmat%index(jj-1)+1
3299        ke = Bmat%index(jj)
3300        do k = ks, ke
3301          kk = Bmat%item(k)
3302          Bp => Bmat%A(ndof2*(k-1)+1:ndof2*k)
3303          ll = -1
3304          do l = 1, icnt
3305            if (Cmat%item(l0+l) == kk) then
3306              ll = l0 + l
3307              exit
3308            endif
3309          enddo
3310          if (ll < 0) then
3311            icnt = icnt + 1
3312            ll = l0 + icnt
3313            Cmat%item(ll) = kk
3314          endif
3315          Cp => Cmat%A(ndof2*(ll-1)+1:ndof2*ll)
3316          call blk_matmul_add(ndof, Ap, Bp, Cp)
3317        enddo
3318      enddo
3319      !write(0,*) 'l0,icnt,index(i)',Cmat%index(i-1),icnt,Cmat%index(i)
3320      if (l0+icnt /= Cmat%index(i)) stop 'ERROR: multiply_mat_mat: unknown error'
3321    enddo
3322    !$omp end do
3323    !$omp end parallel
3324    t1 = hecmw_wtime()
3325    if (TIMER >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (2) : ",t1-t0
3326    t0 = hecmw_wtime()
3327    call sort_and_uniq_rows(Cmat)
3328    t1 = hecmw_wtime()
3329    if (TIMER >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (3) : ",t1-t0
3330  end subroutine multiply_mat_mat
3331
3332  subroutine blk_matmul_add(ndof, A, B, AB)
3333    implicit none
3334    integer, intent(in) :: ndof
3335    real(kind=kreal), intent(in) :: A(:), B(:)
3336    real(kind=kreal), intent(inout) :: AB(:)
3337    integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
3338    ndof2=ndof*ndof
3339    do i=1,ndof
3340      i0=(i-1)*ndof
3341      do j=1,ndof
3342        ij=i0+j
3343        j0=(j-1)*ndof
3344        do k=1,ndof
3345          ik=i0+k
3346          jk=j0+k
3347          !$omp atomic
3348          AB(ik)=AB(ik)+A(ij)*B(jk)
3349        enddo
3350      enddo
3351    enddo
3352  end subroutine blk_matmul_add
3353
3354  subroutine hecmw_localmat_make_hecmat(hecMAT, BTtKTmat, hecTKT)
3355    implicit none
3356    type (hecmwST_matrix), intent(in) :: hecMAT
3357    type (hecmwST_local_matrix), intent(in) :: BTtKTmat
3358    type (hecmwST_matrix), intent(inout) :: hecTKT
3359    call make_new_hecmat(hecMAT, BTtKTmat, hecTKT)
3360  end subroutine hecmw_localmat_make_hecmat
3361
3362  subroutine hecmw_localmat_shrink_comm_table(BKmat, hecMESH)
3363    implicit none
3364    type (hecmwST_local_matrix), intent(in) :: BKmat
3365    type (hecmwST_local_mesh), intent(inout) :: hecMESH
3366    type (hecmwST_matrix_comm) :: hecCOMM
3367    call make_comm_table(BKmat, hecMESH, hecCOMM)
3368    deallocate(hecMESH%import_index)
3369    deallocate(hecMESH%import_item)
3370    deallocate(hecMESH%export_index)
3371    deallocate(hecMESH%export_item)
3372    hecMESH%import_index => hecCOMM%import_index
3373    hecMESH%import_item => hecCOMM%import_item
3374    hecMESH%export_index => hecCOMM%export_index
3375    hecMESH%export_item => hecCOMM%export_item
3376    deallocate(hecCOMM%neighbor_pe)
3377  end subroutine hecmw_localmat_shrink_comm_table
3378
3379end module hecmw_local_matrix
3380