1!------------------------------------------------------------------------------- 2! Copyright (c) 2019 FrontISTR Commons 3! This software is released under the MIT License, see LICENSE.txt 4!------------------------------------------------------------------------------- 5 6module hecmw_local_matrix 7 use hecmw_util 8 use hecmw_pair_array 9 10 private 11 public :: hecmwST_local_matrix 12 public :: hecmw_localmat_write 13 public :: hecmw_localmat_blocking 14 public :: hecmw_localmat_free 15 public :: hecmw_localmat_mulvec 16 public :: hecmw_trimatmul_TtKT 17 public :: hecmw_trimatmul_TtKT_mpc 18 public :: hecmw_localmat_transpose 19 public :: hecmw_localmat_assemble 20 public :: hecmw_localmat_add 21 public :: hecmw_localmat_init_with_hecmat 22 public :: hecmw_localmat_add_hecmat 23 public :: hecmw_localmat_multmat 24 public :: hecmw_localmat_make_hecmat 25 public :: hecmw_localmat_shrink_comm_table 26 27 type hecmwST_local_matrix 28 integer :: nr, nc, nnz, ndof 29 integer(kind=kint), pointer :: index(:) 30 integer(kind=kint), pointer :: item(:) 31 real(kind=kreal), pointer :: A(:) 32 end type hecmwST_local_matrix 33 34 integer(kind=kint), parameter :: cNCOL_ITEM = 2 !< num of column items to be migrated (2 or 3) 35 integer(kind=kint), parameter :: cLID = 1 !< index for local ID in belonging rank (node_ID(2*i-1)) 36 integer(kind=kint), parameter :: cRANK = 2 !< index for belonging rank (node_ID(2*i)) 37 integer(kind=kint), parameter :: cGID = 3 !< index for global ID (used only when cNCOL_ITEM==3) 38 39 integer(kind=kint), parameter :: DEBUG = 0 40 integer(kind=kint), parameter :: TIMER = 0 41 42contains 43 44 subroutine hecmw_localmat_write(Tmat,iunit) 45 implicit none 46 type (hecmwST_local_matrix), intent(in) :: Tmat 47 integer(kind=kint), intent(in) :: iunit 48 integer(kind=kint) :: nr, nc, nnz, ndof, ndof2, i, js, je, j, jj 49 nr=Tmat%nr 50 nc=Tmat%nc 51 nnz=Tmat%nnz 52 ndof=Tmat%ndof 53 ndof2=ndof*ndof 54 write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof 55 write(iunit,*) 'i, j, A' 56 do i=1,nr 57 js=Tmat%index(i-1)+1 58 je=Tmat%index(i) 59 do j=js,je 60 jj=Tmat%item(j) 61 if (ndof==1) then 62 write(iunit,*) i, jj, Tmat%A(j) 63 else 64 write(iunit,*) i, jj 65 write(iunit,*) Tmat%A((j-1)*ndof2+1:j*ndof2) 66 endif 67 enddo 68 enddo 69 end subroutine hecmw_localmat_write 70 71 subroutine hecmw_localmat_write_ij(Tmat,iunit) 72 implicit none 73 type (hecmwST_local_matrix), intent(in) :: Tmat 74 integer(kind=kint), intent(in) :: iunit 75 integer(kind=kint) :: nr, nc, nnz, ndof, i, js, je, j, jj 76 nr=Tmat%nr 77 nc=Tmat%nc 78 nnz=Tmat%nnz 79 ndof=Tmat%ndof 80 write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof 81 write(iunit,*) 'i, j' 82 do i=1,nr 83 js=Tmat%index(i-1)+1 84 je=Tmat%index(i) 85 do j=js,je 86 jj=Tmat%item(j) 87 write(iunit,*) i, jj 88 enddo 89 enddo 90 end subroutine hecmw_localmat_write_ij 91 92 subroutine hecmw_localmat_blocking(Tmat, ndof, BTmat) 93 implicit none 94 type (hecmwST_local_matrix), intent(in) :: Tmat 95 integer, intent(in) :: ndof 96 type (hecmwST_local_matrix), intent(out) :: BTmat 97 integer, allocatable :: iw(:) 98 integer :: ndof2, i, icnt, idof, idx, ls, le, l, j, jb, k, lb0, jdof, ks, ke 99 ndof2=ndof*ndof 100 101 if (mod(Tmat%nr, ndof) /= 0 .or. mod(Tmat%nc, ndof) /= 0) then 102 write(0,*) Tmat%nr, Tmat%nc, ndof 103 stop 'ERROR: blocking_Tmat failed' 104 endif 105 BTmat%nr=Tmat%nr/ndof 106 BTmat%nc=Tmat%nc/ndof 107 BTmat%ndof=ndof 108 109 allocate(iw(BTmat%nc)) 110 allocate(BTmat%index(0:BTmat%nr)) 111 112 BTmat%index(0)=0 113 do i=1,BTmat%nr 114 icnt=0 115 do idof=1,ndof 116 idx=(i-1)*ndof+idof 117 ls=Tmat%index(idx-1)+1 118 le=Tmat%index(idx) 119 lcol: do l=ls,le 120 j=Tmat%item(l) 121 jb=(j-1)/ndof+1 122 do k=1,icnt 123 if (iw(k)==jb) cycle lcol 124 enddo 125 icnt=icnt+1 126 iw(icnt)=jb 127 enddo lcol 128 enddo 129 BTmat%index(i)=BTmat%index(i-1)+icnt 130 enddo 131 132 BTmat%nnz=BTmat%index(BTmat%nr) 133 allocate(BTmat%item(BTmat%nnz)) 134 allocate(BTmat%A(BTmat%nnz*ndof2)) 135 BTmat%A=0.d0 136 137 do i=1,BTmat%nr 138 icnt=0 139 do idof=1,ndof 140 idx=(i-1)*ndof+idof 141 ls=Tmat%index(idx-1)+1 142 le=Tmat%index(idx) 143 lcol2: do l=ls,le 144 j=Tmat%item(l) 145 jb=(j-1)/ndof+1 146 do k=1,icnt 147 if (iw(k)==jb) cycle lcol2 148 enddo 149 icnt=icnt+1 150 iw(icnt)=jb 151 enddo lcol2 152 enddo 153 ! if (icnt /= BTmat%index(i)-BTmat%index(i-1)) stop 'ERROR: blocking Tmat' 154 ! ! call qsort(iw, 1, icnt) 155 lb0=BTmat%index(i-1) 156 do k=1,icnt 157 BTmat%item(lb0+k)=iw(k) 158 enddo 159 do idof=1,ndof 160 idx=(i-1)*ndof+idof 161 ls=Tmat%index(idx-1)+1 162 le=Tmat%index(idx) 163 lcol3: do l=ls,le 164 j=Tmat%item(l) 165 jb=(j-1)/ndof+1 166 jdof=mod((j-1), ndof)+1 167 ks=BTmat%index(i-1)+1 168 ke=BTmat%index(i) 169 do k=ks,ke 170 if (BTmat%item(k)==jb) then 171 BTmat%A((k-1)*ndof2+(idof-1)*ndof+jdof)=Tmat%A(l) 172 cycle lcol3 173 endif 174 enddo 175 stop 'ERROR: something wrong in blocking Tmat' 176 enddo lcol3 177 enddo 178 enddo 179 end subroutine hecmw_localmat_blocking 180 181 subroutine hecmw_localmat_free(Tmat) 182 implicit none 183 type (hecmwST_local_matrix), intent(inout) :: Tmat 184 deallocate(Tmat%index) 185 if (associated(Tmat%item)) deallocate(Tmat%item) 186 if (associated(Tmat%A)) deallocate(Tmat%A) 187 Tmat%nr=0 188 Tmat%nc=0 189 Tmat%nnz=0 190 Tmat%ndof=0 191 end subroutine hecmw_localmat_free 192 193 subroutine hecmw_trimatmul_TtKT(hecMESH, BTtmat, hecMAT, BTmat, & 194 iwS, num_lagrange, hecTKT) 195 use hecmw_matrix_misc 196 implicit none 197 type (hecmwST_local_mesh), intent(inout) :: hecMESH 198 type (hecmwST_local_matrix), intent(inout) :: BTtmat, BTmat 199 type (hecmwST_matrix), intent(in) :: hecMAT 200 integer(kind=kint), intent(in) :: iwS(:) 201 integer(kind=kint), intent(in) :: num_lagrange 202 type (hecmwST_matrix), intent(inout) :: hecTKT 203 if (hecMESH%n_neighbor_pe == 0) then 204 call hecmw_trimatmul_TtKT_serial(hecMESH, BTtmat, hecMAT, BTmat, & 205 iwS, num_lagrange, hecTKT) 206 else 207 call hecmw_trimatmul_TtKT_parallel(hecMESH, BTtmat, hecMAT, BTmat, & 208 iwS, num_lagrange, hecTKT) 209 endif 210 end subroutine hecmw_trimatmul_TtKT 211 212 subroutine hecmw_trimatmul_TtKT_serial(hecMESH, BTtmat, hecMAT, BTmat, & 213 iwS, num_lagrange, hecTKT) 214 use hecmw_matrix_misc 215 implicit none 216 type (hecmwST_local_mesh), intent(in) :: hecMESH 217 type (hecmwST_local_matrix), intent(in) :: BTtmat, BTmat 218 type (hecmwST_matrix), intent(in) :: hecMAT 219 integer(kind=kint), intent(in) :: iwS(:) 220 integer(kind=kint), intent(in) :: num_lagrange 221 type (hecmwST_matrix), intent(inout) :: hecTKT 222 type (hecmwST_local_matrix) :: BTtKT 223 real(kind=kreal) :: num 224 225 ! perform three matrices multiplication for elimination 226 call trimatmul_TtKT(BTtmat, hecMAT, BTmat, BTtKT) 227 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)' 228 !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank()) 229 230 ! place small numbers where the DOF is eliminated 231 !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10 232 num = 1.d0 233 call place_num_on_diag(BTtKT, iwS, num_lagrange, num) 234 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)' 235 !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank()) 236 237 ! make_new HECMW matrix 238 call make_new_hecmat(hecMAT, BTtKT, hecTKT) 239 call hecmw_localmat_free(BTtKT) 240 end subroutine hecmw_trimatmul_TtKT_serial 241 242 subroutine hecmw_trimatmul_TtKT_parallel(hecMESH, BTtmat, hecMAT, BTmat, & 243 iwS, num_lagrange, hecTKT) 244 use hecmw_matrix_misc 245 implicit none 246 type (hecmwST_local_mesh), intent(inout) :: hecMESH 247 type (hecmwST_local_matrix), intent(inout) :: BTtmat, BTmat 248 type (hecmwST_matrix), intent(in) :: hecMAT 249 integer(kind=kint), intent(in) :: iwS(:) 250 integer(kind=kint), intent(in) :: num_lagrange 251 type (hecmwST_matrix), intent(inout) :: hecTKT 252 type (hecmwST_local_matrix) :: BKmat, BTtKmat, BTtKTmat 253 real(kind=kreal) :: num 254 real(kind=kreal) :: t0, t1 255 256 ! perform three matrices multiplication for elimination 257 t0 = hecmw_wtime() 258 call hecmw_localmat_init_with_hecmat(BKmat, hecMAT) 259 if (DEBUG >= 3) then 260 write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)' 261 if (DEBUG == 3) then 262 call hecmw_localmat_write_ij(BKmat, 700+hecmw_comm_get_rank()) 263 else 264 call hecmw_localmat_write(BKmat, 700+hecmw_comm_get_rank()) 265 endif 266 endif 267 t1 = hecmw_wtime() 268 if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (1) : ",t1-t0 269 270 t0 = hecmw_wtime() 271 call hecmw_localmat_multmat(BTtmat, BKmat, hecMESH, BTtKmat) 272 if (DEBUG >= 2) write(0,*) ' DEBUG2: multiply Tt and K done' 273 if (DEBUG >= 3) then 274 write(700+hecmw_comm_get_rank(),*) 'BTtKmat' 275 if (DEBUG == 3) then 276 call hecmw_localmat_write_ij(BTtKmat, 700+hecmw_comm_get_rank()) 277 else 278 call hecmw_localmat_write(BTtKmat, 700+hecmw_comm_get_rank()) 279 endif 280 endif 281 call hecmw_localmat_free(BKmat) 282 t1 = hecmw_wtime() 283 if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (2) : ",t1-t0 284 285 t0 = hecmw_wtime() 286 call hecmw_localmat_multmat(BTtKmat, BTmat, hecMESH, BTtKTmat) 287 if (DEBUG >= 2) write(0,*) ' DEBUG2: multiply TtK and T done' 288 if (DEBUG >= 3) then 289 write(700+hecmw_comm_get_rank(),*) 'BTtKTmat' 290 if (DEBUG == 3) then 291 call hecmw_localmat_write_ij(BTtKTmat, 700+hecmw_comm_get_rank()) 292 else 293 call hecmw_localmat_write(BTtKTmat, 700+hecmw_comm_get_rank()) 294 endif 295 endif 296 call hecmw_localmat_free(BTtKmat) 297 t1 = hecmw_wtime() 298 if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (3) : ",t1-t0 299 300 t0 = hecmw_wtime() 301 ! place small numbers where the DOF is eliminated 302 !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10 303 num = 1.d0 304 call place_num_on_diag(BTtKTmat, iwS, num_lagrange, num) 305 if (DEBUG >= 3) then 306 write(700+hecmw_comm_get_rank(),*) 'num_lagrange =', num_lagrange 307 write(700+hecmw_comm_get_rank(),*) 'iwS(1:num_lagrange)' 308 write(700+hecmw_comm_get_rank(),*) iwS(1:num_lagrange) 309 write(700+hecmw_comm_get_rank(),*) 'BTtKTmat (place 1.0 on slave diag)' 310 if (DEBUG == 3) then 311 call hecmw_localmat_write_ij(BTtKTmat, 700+hecmw_comm_get_rank()) 312 else 313 call hecmw_localmat_write(BTtKTmat, 700+hecmw_comm_get_rank()) 314 endif 315 endif 316 t1 = hecmw_wtime() 317 if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (4) : ",t1-t0 318 319 t0 = hecmw_wtime() 320 ! make_new HECMW matrix 321 call make_new_hecmat(hecMAT, BTtKTmat, hecTKT) 322 call hecmw_localmat_free(BTtKTmat) 323 t1 = hecmw_wtime() 324 if (TIMER >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (5) : ",t1-t0 325 end subroutine hecmw_trimatmul_TtKT_parallel 326 327 subroutine trimatmul_TtKT(BTtmat, hecMAT, BTmat, BTtKT) 328 implicit none 329 type (hecmwST_local_matrix), intent(in) :: BTtmat, BTmat 330 type (hecmwST_matrix), intent(in) :: hecMAT 331 type (hecmwST_local_matrix), intent(out) :: BTtKT 332 integer :: nr, nc, ndof, ndof2, i, icnt, js, je, j, jj, ks, ke, k, kk 333 integer :: ls, le, l, ll, m, ms, me, mm 334 integer, allocatable :: iw(:) 335 real(kind=kreal), pointer :: Ttp(:), Kp(:), Tp(:), TtKTp(:) 336 ! real(kind=kreal) :: tsym_s, tsym_e, tnum_s, tnum_e 337 338 nr=BTtmat%nr 339 nc=BTmat%nc 340 ndof=BTtmat%ndof 341 ndof2=ndof*ndof 342 343 BTtKT%nr=nr 344 BTtKT%nc=nc 345 BTtKT%ndof=ndof 346 allocate(BTtKT%index(0:nr)) 347 348 ! tsym_s = hecmw_wtime() 349 350 !$omp parallel default(none), & 351 !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m), & 352 !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT) 353 allocate(iw(nc)) 354 !$omp do 355 do i=1,nr 356 icnt=0 357 js=BTtmat%index(i-1)+1 358 je=BTtmat%index(i) 359 do j=js,je 360 jj=BTtmat%item(j) 361 ! lower 362 ks=hecMAT%indexL(jj-1)+1 363 ke=hecMAT%indexL(jj) 364 do k=ks,ke 365 kk=hecMAT%itemL(k) 366 ls=BTmat%index(kk-1)+1 367 le=BTmat%index(kk) 368 ll1: do l=ls,le 369 ll=BTmat%item(l) 370 do m=1,icnt 371 if (iw(m)==ll) cycle ll1 372 enddo 373 icnt=icnt+1 374 iw(icnt)=ll 375 !if (i==1) write(0,*) 'l', icnt, jj, kk, ll 376 enddo ll1 377 enddo 378 ! diagonal 379 ls=BTmat%index(jj-1)+1 380 le=BTmat%index(jj) 381 ll2: do l=ls,le 382 ll=BTmat%item(l) 383 do m=1,icnt 384 if (iw(m)==ll) cycle ll2 385 enddo 386 icnt=icnt+1 387 iw(icnt)=ll 388 !if (i==1) write(0,*) 'd', icnt, jj, kk, ll 389 enddo ll2 390 ! upper 391 ks=hecMAT%indexU(jj-1)+1 392 ke=hecMAT%indexU(jj) 393 do k=ks,ke 394 kk=hecMAT%itemU(k) 395 ls=BTmat%index(kk-1)+1 396 le=BTmat%index(kk) 397 ll3: do l=ls,le 398 ll=BTmat%item(l) 399 do m=1,icnt 400 if (iw(m)==ll) cycle ll3 401 enddo 402 icnt=icnt+1 403 iw(icnt)=ll 404 !if (i==1) write(0,*) 'u', icnt, jj, kk, ll 405 enddo ll3 406 enddo 407 enddo 408 if (icnt == 0) icnt=1 409 !if (i==1) write(0,*) iw(1:icnt) 410 BTtKT%index(i)=icnt 411 enddo 412 !$omp end do 413 deallocate(iw) 414 !$omp end parallel 415 416 ! tsym_e = hecmw_wtime() 417 ! write(0,*) 'tsym:',tsym_e-tsym_s 418 419 BTtKT%index(0)=0 420 do i=1,nr 421 BTtKT%index(i)=BTtKT%index(i-1)+BTtKT%index(i) 422 enddo 423 !write(0,*) BTtKT%index(1:n)-BTtKT%index(0:n-1) 424 425 BTtKT%nnz=BTtKT%index(nr) 426 allocate(BTtKT%item(BTtKT%nnz)) 427 allocate(BTtKT%A(BTtKT%nnz*ndof2)) 428 BTtKT%item=0 429 BTtKT%A=0.d0 430 431 ! tnum_s = hecmw_wtime() 432 433 !$omp parallel default(none), & 434 !$omp& private(i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m, & 435 !$omp& ms,me,mm,Ttp,Kp,Tp,TtKTp), & 436 !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT,ndof,ndof2) 437 !$omp do 438 do i=1,nr 439 icnt=0 440 ms=BTtKT%index(i-1)+1 441 !me=BTtKT%index(i) 442 js=BTtmat%index(i-1)+1 443 je=BTtmat%index(i) 444 do j=js,je 445 jj=BTtmat%item(j) 446 Ttp=>BTtmat%A((j-1)*ndof2+1:j*ndof2) 447 ! lower 448 ks=hecMAT%indexL(jj-1)+1 449 ke=hecMAT%indexL(jj) 450 do k=ks,ke 451 kk=hecMAT%itemL(k) 452 Kp=>hecMAT%AL((k-1)*ndof2+1:k*ndof2) 453 ls=BTmat%index(kk-1)+1 454 le=BTmat%index(kk) 455 do l=ls,le 456 ll=BTmat%item(l) 457 Tp=>BTmat%A((l-1)*ndof2+1:l*ndof2) 458 me=ms-1+icnt 459 mm=-1 460 do m=ms,me 461 if (BTtKT%item(m)==ll) mm=m 462 enddo 463 if (mm<0) then 464 icnt=icnt+1 465 mm=me+1 466 BTtKT%item(mm)=ll 467 !if (i==1) write(0,*) 'l', mm, jj, kk, ll 468 endif 469 TtKTp=>BTtKT%A((mm-1)*ndof2+1:mm*ndof2) 470 call blk_trimatmul_add(ndof, Ttp, Kp, Tp, TtKTp) 471 enddo 472 enddo 473 ! diagonal 474 Kp=>hecMAT%D((jj-1)*ndof2+1:jj*ndof2) 475 ls=BTmat%index(jj-1)+1 476 le=BTmat%index(jj) 477 do l=ls,le 478 ll=BTmat%item(l) 479 Tp=>BTmat%A((l-1)*ndof2+1:l*ndof2) 480 me=ms-1+icnt 481 mm=-1 482 do m=ms,me 483 if (BTtKT%item(m)==ll) mm=m 484 enddo 485 if (mm<0) then 486 icnt=icnt+1 487 mm=me+1 488 BTtKT%item(mm)=ll 489 !if (i==1) write(0,*) 'd', mm, jj, kk, ll 490 endif 491 TtKTp=>BTtKT%A((mm-1)*ndof2+1:mm*ndof2) 492 call blk_trimatmul_add(ndof, Ttp, Kp, Tp, TtKTp) 493 enddo 494 ! upper 495 ks=hecMAT%indexU(jj-1)+1 496 ke=hecMAT%indexU(jj) 497 do k=ks,ke 498 kk=hecMAT%itemU(k) 499 Kp=>hecMAT%AU((k-1)*ndof2+1:k*ndof2) 500 ls=BTmat%index(kk-1)+1 501 le=BTmat%index(kk) 502 do l=ls,le 503 ll=BTmat%item(l) 504 Tp=>BTmat%A((l-1)*ndof2+1:l*ndof2) 505 me=ms-1+icnt 506 mm=-1 507 do m=ms,me 508 if (BTtKT%item(m)==ll) mm=m 509 enddo 510 if (mm<0) then 511 icnt=icnt+1 512 mm=me+1 513 BTtKT%item(mm)=ll 514 !if (i==1) write(0,*) 'u', mm, jj, kk, ll 515 endif 516 TtKTp=>BTtKT%A((mm-1)*ndof2+1:mm*ndof2) 517 call blk_trimatmul_add(ndof, Ttp, Kp, Tp, TtKTp) 518 enddo 519 enddo 520 enddo 521 if (icnt == 0) then 522 icnt=1 523 BTtKT%item(ms)=i 524 endif 525 ! error check! 526 !write(0,*) BTtKT%item(ms:ms-1+icnt) 527 !write(0,*) BTtKT%index(i)-BTtKT%index(i-1), icnt 528 if (ms-1+icnt /= BTtKT%index(i)) stop 'ERROR: trimatmul' 529 enddo 530 !$omp end do 531 !$omp end parallel 532 533 ! tnum_e = hecmw_wtime() 534 ! write(0,*) 'tnum:',tnum_e-tnum_s 535 end subroutine trimatmul_TtKT 536 537 subroutine blk_trimatmul_add(ndof, A, B, C, ABC) 538 implicit none 539 integer, intent(in) :: ndof 540 real(kind=kreal), intent(in) :: A(:), B(:), C(:) 541 real(kind=kreal), intent(inout) :: ABC(:) 542 real(kind=kreal), allocatable :: AB(:) 543 integer :: ndof2, i, j, k, i0, j0, ij, ik, jk 544 545 ndof2=ndof*ndof 546 allocate(AB(ndof2)) 547 AB=0.d0 548 549 do i=1,ndof 550 i0=(i-1)*ndof 551 do j=1,ndof 552 ij=i0+j 553 j0=(j-1)*ndof 554 do k=1,ndof 555 ik=i0+k 556 jk=j0+k 557 AB(ik)=AB(ik)+A(ij)*B(jk) 558 enddo 559 enddo 560 enddo 561 562 do i=1,ndof 563 i0=(i-1)*ndof 564 do j=1,ndof 565 ij=i0+j 566 j0=(j-1)*ndof 567 do k=1,ndof 568 ik=i0+k 569 jk=j0+k 570 ABC(ik)=ABC(ik)+AB(ij)*C(jk) 571 enddo 572 enddo 573 enddo 574 575 deallocate(AB) 576 end subroutine blk_trimatmul_add 577 578 subroutine place_num_on_diag(BTtKT, iwS, num_lagrange, num) 579 implicit none 580 type (hecmwST_local_matrix), intent(inout) :: BTtKT 581 integer(kind=kint), intent(in) :: iwS(:) 582 integer(kind=kint), intent(in) :: num_lagrange 583 real(kind=kreal), intent(in) :: num 584 integer(kind=kint) :: ndof, ndof2, ilag, i, idof, js, je, j, jj 585 integer(kind=kint) :: nmissing, k, ks, ke 586 integer(kind=kint), allocatable :: missing(:), cnt(:) 587 integer(kind=kint), pointer :: index(:), item(:) 588 real(kind=kreal), pointer :: A(:) 589 590 ndof=BTtKT%ndof 591 ndof2=ndof*ndof 592 593 ! check if there are places 594 allocate(missing(num_lagrange)) 595 nmissing = 0 596 outer1: do ilag=1,num_lagrange 597 i=(iwS(ilag)-1)/ndof+1 598 idof=mod(iwS(ilag)-1, ndof)+1 599 js=BTtKT%index(i-1)+1 600 je=BTtKT%index(i) 601 do j=js,je 602 jj=BTtKT%item(j) 603 if (jj==i) cycle outer1 ! found place 604 enddo 605 ! not found 606 do k=1,nmissing 607 if (missing(k) == i) cycle outer1 ! already marked as missing 608 enddo 609 nmissing = nmissing + 1 610 missing(nmissing) = i 611 enddo outer1 612 613 ! if not, reallocate 614 if (nmissing > 0) then 615 allocate(cnt(BTtKT%nr)) 616 allocate(index(0:BTtKT%nr)) 617 do i=1,BTtKT%nr 618 cnt(i) = BTtKT%index(i) - BTtKT%index(i-1) 619 enddo 620 do i=1,nmissing 621 cnt(missing(i)) = cnt(missing(i)) + 1 622 enddo 623 call make_index(BTtKT%nr, cnt, index) 624 allocate(item(BTtKT%nnz + nmissing)) 625 allocate(A(ndof2 * (BTtKT%nnz + nmissing))) 626 do i=1,BTtKT%nr 627 ks=index(i-1)+1 628 js=BTtKT%index(i-1)+1 629 je=BTtKT%index(i) 630 item(ks:ks+(je-js))=BTtKT%item(js:je) 631 A(ndof2*(ks-1)+1:ndof2*(ks+(je-js)))=BTtKT%A(ndof2*(js-1)+1:ndof2*je) 632 enddo 633 do i=1,nmissing 634 ke=index(missing(i)) 635 item(ke)=missing(i) 636 A(ndof2*(ke-1)+1:ndof2*ke)=0.d0 637 enddo 638 deallocate(BTtKT%index) 639 deallocate(BTtKT%item) 640 deallocate(BTtKT%A) 641 BTtKT%index => index 642 BTtKT%item => item 643 BTtKT%A => A 644 BTtKT%nnz = index(BTtKT%nr) 645 deallocate(cnt) 646 endif 647 deallocate(missing) 648 649 ! place num 650 outer: do ilag=1,num_lagrange 651 i=(iwS(ilag)-1)/ndof+1 652 idof=mod(iwS(ilag)-1, ndof)+1 653 js=BTtKT%index(i-1)+1 654 je=BTtKT%index(i) 655 do j=js,je 656 jj=BTtKT%item(j) 657 if (jj==i) then 658 !write(0,*) ilag, i, idof 659 BTtKT%A((j-1)*ndof2+(idof-1)*ndof+idof)=num 660 cycle outer 661 endif 662 enddo 663 enddo outer 664 end subroutine place_num_on_diag 665 666 subroutine replace_hecmat(hecMAT, BTtKT) 667 implicit none 668 type (hecmwST_matrix), intent(inout) :: hecMAT 669 type (hecmwST_local_matrix), intent(in) :: BTtKT 670 integer :: nr, nc, ndof, ndof2, i, nl, nu, js, je, j, jj 671 integer :: ksl, ksu, k 672 673 nr=BTtKT%nr 674 nc=BTtKT%nc 675 ndof=hecMAT%NDOF 676 ndof2=ndof*ndof 677 678 ! free old hecMAT 679 if (associated(hecMAT%AL)) deallocate(hecMAT%AL) 680 if (associated(hecMAT%AU)) deallocate(hecMAT%AU) 681 if (associated(hecMAT%itemL)) deallocate(hecMAT%itemL) 682 if (associated(hecMAT%itemU)) deallocate(hecMAT%itemU) 683 hecMAT%indexL=0 684 hecMAT%indexU=0 685 686 ! count NPL, NPU 687 !$omp parallel default(none),private(i,nl,nu,js,je,j,jj), & 688 !$omp& shared(nr,BTtKT,hecMAT) 689 !$omp do 690 do i=1,nr 691 nl=0 692 nu=0 693 js=BTtKT%index(i-1)+1 694 je=BTtKT%index(i) 695 do j=js,je 696 jj=BTtKT%item(j) 697 if (jj < i) then 698 nl=nl+1 699 elseif (i < jj) then 700 nu=nu+1 701 else 702 ! diagonal 703 endif 704 enddo 705 hecMAT%indexL(i)=nl 706 hecMAT%indexU(i)=nu 707 enddo 708 !$omp end do 709 !$omp end parallel 710 711 hecMAT%indexL(0)=0 712 hecMAT%indexU(0)=0 713 do i=1,nc 714 hecMAT%indexL(i)=hecMAT%indexL(i-1)+hecMAT%indexL(i) 715 hecMAT%indexU(i)=hecMAT%indexU(i-1)+hecMAT%indexU(i) 716 enddo 717 hecMAT%NPL=hecMAT%indexL(nc) 718 hecMAT%NPU=hecMAT%indexU(nc) 719 720 ! allocate new hecMAT 721 allocate(hecMAT%itemL(hecMAT%NPL), hecMAT%itemU(hecMAT%NPU)) 722 allocate(hecMAT%AL(hecMAT%NPL*ndof2), hecMAT%AU(hecMAT%NPU*ndof2)) 723 hecMAT%itemL=0 724 hecMAT%itemU=0 725 hecMAT%D=0.d0 726 hecMAT%AL=0.d0 727 hecMAT%AU=0.d0 728 729 ! copy from BTtKT to hecMAT 730 !$omp parallel default(none),private(i,nl,nu,js,je,ksl,ksu,j,jj,k), & 731 !$omp& shared(nr,BTtKT,hecMAT,ndof2) 732 !$omp do 733 do i=1,nr 734 nl=0 735 nu=0 736 js=BTtKT%index(i-1)+1 737 je=BTtKT%index(i) 738 ksl=hecMAT%indexL(i-1)+1 739 ksu=hecMAT%indexU(i-1)+1 740 do j=js,je 741 jj=BTtKT%item(j) 742 if (jj < i) then 743 k=ksl+nl 744 hecMAT%itemL(k)=jj 745 hecMAT%AL((k-1)*ndof2+1:k*ndof2)=BTtKT%A((j-1)*ndof2+1:j*ndof2) 746 nl=nl+1 747 elseif (i < jj) then 748 k=ksu+nu 749 hecMAT%itemU(k)=jj 750 hecMAT%AU((k-1)*ndof2+1:k*ndof2)=BTtKT%A((j-1)*ndof2+1:j*ndof2) 751 nu=nu+1 752 else 753 hecMAT%D((i-1)*ndof2+1:i*ndof2)=BTtKT%A((j-1)*ndof2+1:j*ndof2) 754 endif 755 enddo 756 ! if (ksl+nl /= hecMAT%indexL(i)+1) stop 'ERROR: indexL' 757 ! if (ksu+nu /= hecMAT%indexU(i)+1) stop 'ERROR: indexU' 758 enddo 759 !$omp end do 760 !$omp end parallel 761 762 ! do i=1,hecMAT%NPL 763 ! if (hecMAT%itemL(i) <= 0) stop 'ERROR: negative itemL' 764 ! if (hecMAT%itemL(i) > nc) stop 'ERROR: too big itemL' 765 ! enddo 766 ! do i=1,hecMAT%NPU 767 ! if (hecMAT%itemU(i) <= 0) stop 'ERROR: negative itemU' 768 ! if (hecMAT%itemU(i) > nc) stop 'ERROR: too big itemU' 769 ! enddo 770 end subroutine replace_hecmat 771 772 subroutine make_new_hecmat(hecMAT, BTtKT, hecTKT) 773 implicit none 774 type(hecmwST_matrix), intent(in) :: hecMAT 775 type(hecmwST_local_matrix), intent(in) :: BTtKT 776 type(hecmwST_matrix), intent(inout) :: hecTKT 777 integer(kind=kint) :: nr, nc, ndof, ndof2 778 779 nr=BTtKT%nr 780 nc=BTtKT%nc 781 ndof=BTtKT%ndof 782 ndof2=ndof*ndof 783 784 !write(0,*) 'DEBUG: nr, nc =',nr,nc 785 786 ! if (nr /= nc) then 787 ! stop 'ERROR: nr /= nc' 788 ! endif 789 hecTKT%N =hecMAT%N 790 hecTKT%NP=nc 791 hecTKT%NDOF=ndof 792 793 if (associated(hecTKT%D)) deallocate(hecTKT%D) 794 allocate(hecTKT%D(nc*ndof2)) 795 796 if (associated(hecTKT%indexL)) deallocate(hecTKT%indexL) 797 if (associated(hecTKT%indexU)) deallocate(hecTKT%indexU) 798 allocate(hecTKT%indexL(0:nc)) 799 allocate(hecTKT%indexU(0:nc)) 800 801 hecTKT%Iarray=hecMAT%Iarray 802 hecTKT%Rarray=hecMAT%Rarray 803 804 call replace_hecmat(hecTKT, BTtKT) 805 end subroutine make_new_hecmat 806 807 subroutine hecmw_localmat_mulvec(BTmat, V, TV) 808 implicit none 809 type (hecmwST_local_matrix), intent(in) :: BTmat 810 real(kind=kreal), intent(in), target :: V(:) 811 real(kind=kreal), intent(out), target :: TV(:) 812 real(kind=kreal), pointer :: TVp(:), Tp(:), Vp(:) 813 integer :: nr, ndof, ndof2, i, js, je, j, jj, k, kl0, l 814 !!$ real(kind=kreal) :: vnorm 815 816 nr=BTmat%nr 817 ndof=BTmat%ndof 818 ndof2=ndof*ndof 819 820 TV=0.d0 821 822 !!$ vnorm=0.d0 823 !!$ do i=1,nr*ndof 824 !!$ vnorm=vnorm+V(i)**2 825 !!$ enddo 826 !!$ write(0,*) 'vnorm:', sqrt(vnorm) 827 828 !$omp parallel default(none),private(i,TVp,js,je,j,jj,Tp,Vp,k,kl0,l), & 829 !$omp& shared(nr,TV,ndof,BTmat,ndof2,V) 830 !$omp do 831 do i=1,nr 832 TVp=>TV((i-1)*ndof+1:i*ndof) 833 js=BTmat%index(i-1)+1 834 je=BTmat%index(i) 835 do j=js,je 836 jj=BTmat%item(j) 837 Tp=>BTmat%A((j-1)*ndof2+1:j*ndof2) 838 Vp=>V((jj-1)*ndof+1:jj*ndof) 839 do k=1,ndof 840 kl0=(k-1)*ndof 841 do l=1,ndof 842 TVp(k)=TVp(k)+Tp(kl0+l)*Vp(l) 843 enddo 844 enddo 845 enddo 846 enddo 847 !$omp end do 848 !$omp end parallel 849 end subroutine hecmw_localmat_mulvec 850 851 subroutine hecmw_trimatmul_TtKT_mpc(hecMESH, hecMAT, hecTKT) 852 implicit none 853 type (hecmwST_local_mesh), intent(inout) :: hecMESH 854 type (hecmwST_matrix), intent(in) :: hecMAT 855 type (hecmwST_matrix), intent(inout) :: hecTKT 856 type (hecmwST_local_matrix) :: BTmat, BTtmat 857 integer(kind=kint), allocatable :: iwS(:) 858 integer(kind=kint) :: ndof, n_mpc, i_mpc 859 integer(kind=kint) :: i, j, k, kk, ilag 860 real(kind=kreal) :: t0, t1 861 t0 = hecmw_wtime() 862 ndof=hecMAT%NDOF 863 n_mpc=0 864 OUTER: do i=1,hecMESH%mpc%n_mpc 865 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 866 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER 867 enddo 868 n_mpc=n_mpc+1 869 enddo OUTER 870 allocate(iwS(n_mpc)) 871 i_mpc=0 872 OUTER2: do i=1,hecMESH%mpc%n_mpc 873 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 874 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER2 875 enddo 876 i_mpc=i_mpc+1 877 k=hecMESH%mpc%mpc_index(i-1)+1 878 kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k) 879 iwS(i_mpc)=kk 880 enddo OUTER2 881 if (DEBUG >= 2) then 882 write(700+hecmw_comm_get_rank(),*) 'DEBUG: n_mpc, slaves',n_mpc,iwS(1:n_mpc) 883 endif 884 t1 = hecmw_wtime() 885 if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (1) : ",t1-t0 886 t0 = hecmw_wtime() 887 call make_BTmat_mpc(hecMESH, ndof, BTmat) 888 if (DEBUG >= 3) then 889 write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTmat(MPC)' 890 if (DEBUG == 3) then 891 call hecmw_localmat_write_ij(BTmat,700+hecmw_comm_get_rank()) 892 else 893 call hecmw_localmat_write(BTmat,700+hecmw_comm_get_rank()) 894 endif 895 endif 896 t1 = hecmw_wtime() 897 if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (2) : ",t1-t0 898 t0 = hecmw_wtime() 899 ! call make_BTtmat_mpc(hecMESH, ndof, BTtmat) 900 call hecmw_localmat_transpose(BTmat, BTtmat) 901 ! if (hecmw_localmat_equal(BTtmat, BTtmat2) == 0) then 902 ! write(0,*) 'ERROR: BTtmat2 is incorrect!!!' 903 ! else 904 ! write(0,*) 'DEBUG: BTtmat2 is correct' 905 ! endif 906 if (DEBUG >= 3) then 907 write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtmat(MPC)' 908 if (DEBUG == 3) then 909 call hecmw_localmat_write_ij(BTtmat,700+hecmw_comm_get_rank()) 910 else 911 call hecmw_localmat_write(BTtmat,700+hecmw_comm_get_rank()) 912 endif 913 endif 914 t1 = hecmw_wtime() 915 if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (3) : ",t1-t0 916 t0 = hecmw_wtime() 917 call hecmw_trimatmul_TtKT(hecMESH, BTtmat, hecMAT, BTmat, iwS, n_mpc, hecTKT) 918 t1 = hecmw_wtime() 919 if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (4) : ",t1-t0 920 t0 = hecmw_wtime() 921 922 if (associated(hecTKT%B)) deallocate(hecTKT%B) 923 if (associated(hecTKT%X)) deallocate(hecTKT%X) 924 allocate(hecTKT%B(ndof*hecTKT%NP)) 925 allocate(hecTKT%X(ndof*hecTKT%NP)) 926 do i=1, size(hecMAT%B) 927 hecTKT%B(i) = hecMAT%B(i) 928 enddo 929 do i=1, size(hecMAT%X) 930 hecTKT%X(i) = hecMAT%X(i) 931 enddo 932 do ilag=1,n_mpc 933 hecTKT%X(iwS(ilag)) = 0.d0 934 enddo 935 936 call hecmw_localmat_free(BTmat) 937 call hecmw_localmat_free(BTtmat) 938 ! call hecmw_localmat_free(BTtmat2) 939 deallocate(iwS) 940 t1 = hecmw_wtime() 941 if (TIMER >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (5) : ",t1-t0 942 end subroutine hecmw_trimatmul_TtKT_mpc 943 944 subroutine make_BTmat_mpc(hecMESH, ndof, BTmat) 945 implicit none 946 type (hecmwST_local_mesh), intent(in) :: hecMESH 947 integer(kind=kint), intent(in) :: ndof 948 type (hecmwST_local_matrix), intent(out) :: BTmat 949 type (hecmwST_local_matrix) :: Tmat 950 integer(kind=kint) :: n_mpc 951 integer(kind=kint) :: i,j,k,js,jj,kk 952 n_mpc=0 953 OUTER: do i=1,hecMESH%mpc%n_mpc 954 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 955 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER 956 enddo 957 n_mpc=n_mpc+1 958 enddo OUTER 959 Tmat%nr=hecMESH%n_node*ndof 960 Tmat%nc=Tmat%nr 961 Tmat%ndof=1 962 allocate(Tmat%index(0:Tmat%nr)) 963 ! count nonzero in each row 964 Tmat%index(1:Tmat%nr)=1 965 OUTER2: do i=1,hecMESH%mpc%n_mpc 966 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 967 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER2 968 enddo 969 k=hecMESH%mpc%mpc_index(i-1)+1 970 kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k) 971 Tmat%index(kk)=hecMESH%mpc%mpc_index(i)-hecMESH%mpc%mpc_index(i-1)-1 972 enddo OUTER2 973 ! index 974 Tmat%index(0)=0 975 do i=1,Tmat%nr 976 Tmat%index(i)=Tmat%index(i-1)+Tmat%index(i) 977 enddo 978 Tmat%nnz=Tmat%index(Tmat%nr) 979 allocate(Tmat%item(Tmat%nnz), Tmat%A(Tmat%nnz)) 980 ! diag 981 do i=1,Tmat%nr 982 js=Tmat%index(i-1)+1 983 Tmat%item(js)=i 984 Tmat%A(js)=1.d0 985 enddo 986 ! others 987 OUTER3: do i=1,hecMESH%mpc%n_mpc 988 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 989 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER3 990 enddo 991 k=hecMESH%mpc%mpc_index(i-1)+1 992 kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k) 993 js=Tmat%index(kk-1)+1 994 do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i) 995 jj = ndof * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j) 996 Tmat%item(js)=jj 997 Tmat%A(js)=-hecMESH%mpc%mpc_val(j) 998 js=js+1 999 enddo 1000 enddo OUTER3 1001 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Tmat(MPC)' 1002 !call hecmw_localmat_write(Tmat,700+hecmw_comm_get_rank()) 1003 call hecmw_localmat_blocking(Tmat, ndof, BTmat) 1004 call hecmw_localmat_free(Tmat) 1005 end subroutine make_BTmat_mpc 1006 1007 subroutine make_BTtmat_mpc(hecMESH, ndof, BTtmat) 1008 implicit none 1009 type (hecmwST_local_mesh), intent(in) :: hecMESH 1010 integer(kind=kint), intent(in) :: ndof 1011 type (hecmwST_local_matrix), intent(out) :: BTtmat 1012 type (hecmwST_local_matrix) :: Ttmat 1013 integer(kind=kint) :: n_mpc 1014 integer(kind=kint) :: i,j,k,js,je,jj,kk 1015 integer(kind=kint), allocatable :: iw(:) 1016 n_mpc=0 1017 OUTER: do i=1,hecMESH%mpc%n_mpc 1018 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 1019 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER 1020 enddo 1021 n_mpc=n_mpc+1 1022 enddo OUTER 1023 Ttmat%nr=hecMESH%n_node*ndof 1024 Ttmat%nc=Ttmat%nr 1025 Ttmat%ndof=1 1026 allocate(Ttmat%index(0:Ttmat%nr)) 1027 ! count nonzero in each row 1028 Ttmat%index(1:Ttmat%nr)=1 1029 OUTER2: do i=1,hecMESH%mpc%n_mpc 1030 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 1031 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER2 1032 enddo 1033 k=hecMESH%mpc%mpc_index(i-1)+1 1034 kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k) 1035 Ttmat%index(kk)=0 1036 do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i) 1037 jj = ndof * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j) 1038 Ttmat%index(jj)=Ttmat%index(jj)+1 1039 enddo 1040 enddo OUTER2 1041 ! index 1042 Ttmat%index(0)=0 1043 do i=1,Ttmat%nr 1044 Ttmat%index(i)=Ttmat%index(i-1)+Ttmat%index(i) 1045 enddo 1046 Ttmat%nnz=Ttmat%index(Ttmat%nr) 1047 allocate(Ttmat%item(Ttmat%nnz), Ttmat%A(Ttmat%nnz)) 1048 ! diag 1049 do i=1,Ttmat%nr 1050 js=Ttmat%index(i-1)+1 1051 je=Ttmat%index(i) 1052 if (js <= je) then 1053 Ttmat%item(js)=i 1054 Ttmat%A(js)=1.d0 1055 endif 1056 enddo 1057 ! others 1058 allocate(iw(Ttmat%nr)) 1059 iw(:)=1 1060 OUTER3: do i=1,hecMESH%mpc%n_mpc 1061 do j= hecMESH%mpc%mpc_index(i-1) + 1, hecMESH%mpc%mpc_index(i) 1062 if (hecMESH%mpc%mpc_dof(j) > ndof) cycle OUTER3 1063 enddo 1064 k=hecMESH%mpc%mpc_index(i-1)+1 1065 kk=ndof*(hecMESH%mpc%mpc_item(k)-1)+hecMESH%mpc%mpc_dof(k) 1066 do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i) 1067 jj = ndof * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j) 1068 js=Ttmat%index(jj-1)+1+iw(jj) 1069 Ttmat%item(js)=kk 1070 Ttmat%A(js)=-hecMESH%mpc%mpc_val(j) 1071 iw(jj)=iw(jj)+1 1072 enddo 1073 enddo OUTER3 1074 deallocate(iw) 1075 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Ttmat(MPC)' 1076 !call hecmw_localmat_write(Ttmat,700+hecmw_comm_get_rank()) 1077 call hecmw_localmat_blocking(Ttmat, ndof, BTtmat) 1078 call hecmw_localmat_free(Ttmat) 1079 end subroutine make_BTtmat_mpc 1080 1081 subroutine hecmw_localmat_transpose(Tmat, Ttmat) 1082 implicit none 1083 type (hecmwST_local_matrix), intent(in) :: Tmat 1084 type (hecmwST_local_matrix), intent(out) :: Ttmat 1085 integer(kind=kint), allocatable :: iw(:) 1086 integer(kind=kint) :: i, j, jj, ndof, ndof2, k, idof, jdof 1087 allocate(iw(Tmat%nc)) 1088 iw = 0 1089 do i = 1, Tmat%nr 1090 do j = Tmat%index(i-1)+1, Tmat%index(i) 1091 jj = Tmat%item(j) 1092 iw(jj) = iw(jj) + 1 1093 enddo 1094 enddo 1095 Ttmat%nr = Tmat%nc 1096 Ttmat%nc = Tmat%nr 1097 Ttmat%nnz = Tmat%nnz 1098 Ttmat%ndof = Tmat%ndof 1099 ndof = Tmat%ndof 1100 ndof2 = ndof * ndof 1101 allocate(Ttmat%index(0:Ttmat%nr)) 1102 allocate(Ttmat%item(Ttmat%nnz)) 1103 allocate(Ttmat%A(Ttmat%nnz*ndof2)) 1104 Ttmat%index(0) = 0 1105 do i = 1, Ttmat%nr 1106 Ttmat%index(i) = Ttmat%index(i-1) + iw(i) 1107 iw(i) = Ttmat%index(i-1) + 1 1108 enddo 1109 do i = 1, Tmat%nr 1110 do j = Tmat%index(i-1)+1, Tmat%index(i) 1111 jj = Tmat%item(j) 1112 k = iw(jj) 1113 Ttmat%item( k ) = i 1114 do idof = 1, ndof 1115 do jdof = 1, ndof 1116 Ttmat%A((k-1)*ndof2+(idof-1)*ndof+jdof) = & 1117 Tmat%A((j-1)*ndof2+(jdof-1)*ndof+idof) 1118 enddo 1119 enddo 1120 iw(jj) = k + 1 1121 enddo 1122 enddo 1123 end subroutine hecmw_localmat_transpose 1124 1125 function hecmw_localmat_equal(Tmat1, Tmat2) 1126 implicit none 1127 type (hecmwST_local_matrix), intent(in) :: Tmat1, Tmat2 1128 integer(kind=kint) :: hecmw_localmat_equal 1129 integer(kind=kint) :: i, j, k0, k, ndof, ndof2 1130 hecmw_localmat_equal = 0 1131 if (Tmat1%nr /= Tmat2%nr) return 1132 if (Tmat1%nc /= Tmat2%nc) return 1133 if (Tmat1%nnz /= Tmat2%nnz) return 1134 if (Tmat1%ndof /= Tmat2%ndof) return 1135 ndof = Tmat1%ndof 1136 ndof2 = ndof * ndof 1137 do i = 1, Tmat1%nr 1138 if (Tmat1%index(i) /= Tmat2%index(i)) return 1139 do j = Tmat1%index(i-1)+1, Tmat1%index(i) 1140 if (Tmat1%item(j) /= Tmat2%item(j)) return 1141 k0 = (j-1)*ndof2 1142 do k = 1, ndof2 1143 if (Tmat1%A(k0+k) /= Tmat2%A(k0+k)) return 1144 enddo 1145 enddo 1146 enddo 1147 hecmw_localmat_equal = 1 1148 end function hecmw_localmat_equal 1149 1150!!! 1151!!! Subroutines for parallel contact analysis with iterative linear solver 1152!!! 1153 1154 subroutine hecmw_localmat_assemble(BTmat, hecMESH, hecMESHnew) 1155 implicit none 1156 type (hecmwST_local_matrix), intent(inout) :: BTmat 1157 type (hecmwST_local_mesh), intent(in) :: hecMESH 1158 type (hecmwST_local_mesh), intent(inout) :: hecMESHnew 1159 integer(kind=kint) :: nn_int, np, ndof, ndof2, nr_ext, nnz_ext 1160 integer(kind=kint), allocatable :: exp_rows_index(:), exp_cols_index(:) 1161 integer(kind=kint), allocatable :: exp_rows_item(:,:), exp_cols_item(:,:) 1162 type (hecmwST_local_matrix), allocatable :: BT_ext(:) 1163 type (hecmwST_local_matrix) :: BT_int 1164 type (hecmwST_local_matrix) :: BTnew 1165 ! some checks 1166 if (DEBUG >= 1) write(0,*) 'DEBUG: nr,nc,nnz,ndof',BTmat%nr,BTmat%nc,BTmat%nnz,BTmat%ndof 1167 if (BTmat%nr /= hecMESH%n_node) stop 'ERROR: invalid size in hecmw_localmat_assemble' 1168 ! 1169 nn_int = hecMESH%nn_internal 1170 np = hecMESH%n_node 1171 ndof = BTmat%ndof 1172 ndof2 = ndof*ndof 1173 ! 1174 nr_ext = np - nn_int 1175 nnz_ext = BTmat%index(np) - BTmat%index(nn_int) 1176 ! 1177 call prepare_BT_ext(BTmat, hecMESH, exp_rows_index, exp_rows_item, BT_ext) 1178 if (DEBUG >= 1) write(0,*) 'DEBUG: prepare_BT_ext done' 1179 ! 1180 call prepare_column_info(hecMESH, BT_ext, exp_cols_index, exp_cols_item) 1181 if (DEBUG >= 1) write(0,*) 'DEBUG: prepare_column info done' 1182 ! 1183 call send_BT_ext_and_recv_BT_int(hecMESH, exp_rows_index, exp_rows_item, BT_ext, & 1184 exp_cols_index, exp_cols_item, BT_int, hecMESHnew) 1185 if (DEBUG >= 1) write(0,*) 'DEBUG: send BT_ext and recv BT_int done' 1186 ! 1187 !write(0,*) 'BTmat%ndof,BT_int%ndof',BTmat%ndof,BT_int%ndof 1188 call hecmw_localmat_add(BTmat, BT_int, BTnew) 1189 if (DEBUG >= 1) write(0,*) 'DEBUG: localmat_add done' 1190 ! 1191 call hecmw_localmat_free(BTmat) 1192 call hecmw_localmat_free(BT_int) 1193 ! 1194 BTmat%nr = BTnew%nr 1195 BTmat%nc = BTnew%nc 1196 BTmat%nnz = BTnew%nnz 1197 BTmat%ndof = BTnew%ndof 1198 BTmat%index => BTnew%index 1199 BTmat%item => BTnew%item 1200 BTmat%A => BTnew%A 1201 ! 1202 ! hecMESH%n_node = hecMESHnew%n_node 1203 ! hecMESH%n_neighbor_pe = hecMESHnew%n_neighbor_pe 1204 ! deallocate(hecMESH%neighbor_pe) 1205 ! deallocate(hecMESH%import_index) 1206 ! deallocate(hecMESH%export_index) 1207 ! deallocate(hecMESH%import_item) 1208 ! deallocate(hecMESH%export_item) 1209 ! deallocate(hecMESH%node_ID) 1210 ! deallocate(hecMESH%global_node_ID) 1211 ! hecMESH%neighbor_pe => hecMESHnew%neighbor_pe 1212 ! hecMESH%import_index => hecMESHnew%import_index 1213 ! hecMESH%export_index => hecMESHnew%export_index 1214 ! hecMESH%import_item => hecMESHnew%import_item 1215 ! hecMESH%export_item => hecMESHnew%export_item 1216 ! hecMESH%node_ID => hecMESHnew%node_ID 1217 ! hecMESH%global_node_ID => hecMESHnew%global_node_ID 1218 ! 1219 if (DEBUG >= 1) write(0,*) 'DEBUG: update BTmat and hecMESH done' 1220 end subroutine hecmw_localmat_assemble 1221 1222 subroutine prepare_BT_ext(BTmat, hecMESH, exp_rows_index, exp_rows_item, BT_ext) 1223 implicit none 1224 type (hecmwST_local_matrix), intent(in) :: BTmat 1225 type (hecmwST_local_mesh), intent(in) :: hecMESH 1226 integer(kind=kint), allocatable, intent(out) :: exp_rows_index(:) 1227 integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:) 1228 type (hecmwST_local_matrix), allocatable, intent(out) :: BT_ext(:) 1229 integer(kind=kint), allocatable :: incl_nz(:), exp_cols_per_row(:), exp_rows_per_rank(:) 1230 integer(kind=kint) :: nn_int 1231 nn_int = hecMESH%nn_internal 1232 ! 1233 call check_external_nz_blocks(BTmat, nn_int, incl_nz) 1234 ! 1235 call count_ext_rows_with_nz(BTmat, nn_int, incl_nz, exp_cols_per_row) 1236 ! 1237 call count_exp_rows_per_rank(hecMESH, exp_cols_per_row, exp_rows_per_rank) 1238 ! 1239 allocate(exp_rows_index(0:hecMESH%n_neighbor_pe)) 1240 call make_index(hecMESH%n_neighbor_pe, exp_rows_per_rank, exp_rows_index) 1241 !write(0,*) 'exp_rows_index',exp_rows_index(:) 1242 ! 1243 deallocate(exp_rows_per_rank) 1244 ! 1245 call make_exp_rows_item(hecMESH, exp_cols_per_row, exp_rows_index, exp_rows_item) 1246 ! 1247 deallocate(exp_cols_per_row) 1248 ! 1249 allocate(BT_ext(hecMESH%n_neighbor_pe)) 1250 call extract_BT_ext(hecMESH, BTmat, incl_nz, exp_rows_index, exp_rows_item, BT_ext) 1251 ! 1252 deallocate(incl_nz) 1253 end subroutine prepare_BT_ext 1254 1255 subroutine check_external_nz_blocks(BTmat, nn_internal, incl_nz) 1256 implicit none 1257 type (hecmwST_local_matrix), intent(in) :: BTmat 1258 integer(kind=kint), intent(in) :: nn_internal 1259 integer(kind=kint), allocatable, intent(out) :: incl_nz(:) 1260 integer(kind=kint) :: ndof2, i0, nnz_ext, i, k, nnz_blk 1261 if (nn_internal > BTmat%nr) stop 'ERROR: invalid nn_internal' 1262 ndof2 = BTmat%ndof ** 2 1263 i0 = BTmat%index(nn_internal) 1264 nnz_ext = BTmat%index(BTmat%nr) - i0 1265 allocate(incl_nz(nnz_ext)) 1266 nnz_blk = 0 1267 do i = 1, nnz_ext 1268 incl_nz(i) = 0 1269 do k = 1, ndof2 1270 if (BTmat%A(ndof2*(i0+i-1)+k) /= 0.0d0) then 1271 incl_nz(i) = 1 1272 nnz_blk = nnz_blk + 1 1273 exit 1274 endif 1275 enddo 1276 enddo 1277 if (DEBUG >= 1) write(0,*) 'DEBUG: nnz_blk',nnz_blk 1278 end subroutine check_external_nz_blocks 1279 1280 subroutine count_ext_rows_with_nz(BTmat, nn_internal, incl_nz, exp_cols_per_row) 1281 implicit none 1282 type (hecmwST_local_matrix), intent(in) :: BTmat 1283 integer(kind=kint), intent(in) :: nn_internal 1284 integer(kind=kint), intent(in) :: incl_nz(:) 1285 integer(kind=kint), allocatable, intent(out) :: exp_cols_per_row(:) 1286 integer(kind=kint) :: nr_ext, nnz_int, i, irow, js, je, j, jcol 1287 nr_ext = BTmat%nr - nn_internal 1288 nnz_int = BTmat%index(nn_internal) 1289 allocate(exp_cols_per_row(nr_ext)) 1290 exp_cols_per_row(:) = 0 1291 do i = 1, nr_ext 1292 irow = nn_internal+i 1293 js = BTmat%index(irow-1)+1 1294 je = BTmat%index(irow) 1295 do j = js, je 1296 jcol = BTmat%item(j) 1297 if (incl_nz(j-nnz_int) == 1) exp_cols_per_row(i) = exp_cols_per_row(i) + 1 1298 enddo 1299 enddo 1300 !write(0,*) 'exp_cols_per_row',exp_cols_per_row(:) 1301 end subroutine count_ext_rows_with_nz 1302 1303 subroutine count_exp_rows_per_rank(hecMESH, exp_cols_per_row, exp_rows_per_rank) 1304 implicit none 1305 type (hecmwST_local_mesh), intent(in) :: hecMESH 1306 integer(kind=kint), intent(in) :: exp_cols_per_row(:) 1307 integer(kind=kint), allocatable, intent(out) :: exp_rows_per_rank(:) 1308 integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom 1309 allocate(exp_rows_per_rank(hecMESH%n_neighbor_pe)) 1310 exp_rows_per_rank(1:hecMESH%n_neighbor_pe) = 0 1311 nn_int = hecMESH%nn_internal 1312 np = hecMESH%n_node 1313 nr_ext = np - nn_int 1314 do i = 1, nr_ext 1315 if (exp_cols_per_row(i) > 0) then 1316 irow = nn_int + i 1317 exp_rank = hecMESH%node_ID(2*irow) 1318 call rank_to_idom(hecMESH, exp_rank, idom) 1319 exp_rows_per_rank(idom) = exp_rows_per_rank(idom) + 1 1320 endif 1321 enddo 1322 !write(0,*) 'exp_rows_per_rank',exp_rows_per_rank(:) 1323 end subroutine count_exp_rows_per_rank 1324 1325 subroutine rank_to_idom(hecMESH, rank, idom) 1326 implicit none 1327 type (hecmwST_local_mesh), intent(in) :: hecMESH 1328 integer(kind=kint), intent(in) :: rank 1329 integer(kind=kint), intent(out) :: idom 1330 integer(kind=kint) :: i 1331 do i = 1, hecMESH%n_neighbor_pe 1332 if (hecMESH%neighbor_pe(i) == rank) then 1333 idom = i 1334 return 1335 endif 1336 enddo 1337 stop 'ERROR: exp_rank not found in neighbor_pe' 1338 end subroutine rank_to_idom 1339 1340 subroutine make_index(len, cnt, index) 1341 implicit none 1342 integer(kind=kint), intent(in) :: len 1343 integer(kind=kint), intent(in) :: cnt(len) 1344 integer(kind=kint), intent(out) :: index(0:) 1345 integer(kind=kint) :: i 1346 ! write(0,*) 'make_index: len',len 1347 index(0) = 0 1348 do i = 1,len 1349 index(i) = index(i-1) + cnt(i) 1350 enddo 1351 end subroutine make_index 1352 1353 subroutine make_exp_rows_item(hecMESH, exp_cols_per_row, exp_rows_index, exp_rows_item) 1354 implicit none 1355 type (hecmwST_local_mesh), intent(in) :: hecMESH 1356 integer(kind=kint), intent(in) :: exp_cols_per_row(:) 1357 integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:) 1358 integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:) 1359 integer(kind=kint), allocatable :: cnt(:) 1360 integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom, idx 1361 allocate(exp_rows_item(2,exp_rows_index(hecMESH%n_neighbor_pe))) 1362 allocate(cnt(hecMESH%n_neighbor_pe)) 1363 cnt(:) = 0 1364 nn_int = hecMESH%nn_internal 1365 np = hecMESH%n_node 1366 nr_ext = np - nn_int 1367 do i = 1, nr_ext 1368 if (exp_cols_per_row(i) > 0) then 1369 irow = nn_int + i 1370 exp_rank = hecMESH%node_ID(2*irow) 1371 call rank_to_idom(hecMESH, exp_rank, idom) 1372 cnt(idom) = cnt(idom) + 1 1373 idx = exp_rows_index(idom-1) + cnt(idom) 1374 exp_rows_item(1,idx) = irow 1375 exp_rows_item(2,idx) = exp_cols_per_row(i) 1376 endif 1377 enddo 1378 !write(0,*) 'cnt',cnt(:) 1379 do idom = 1, hecMESH%n_neighbor_pe 1380 if (cnt(idom) /= exp_rows_index(idom)-exp_rows_index(idom-1)) stop 'ERROR: make exp_rows_item' 1381 enddo 1382 !write(0,*) 'exp_rows_item(1,:)',exp_rows_item(1,:) 1383 !write(0,*) 'exp_rows_item(2,:)',exp_rows_item(2,:) 1384 end subroutine make_exp_rows_item 1385 1386 subroutine extract_BT_ext(hecMESH, BTmat, incl_nz, exp_rows_index, exp_rows_item, BT_ext) 1387 implicit none 1388 type (hecmwST_local_mesh), intent(in) :: hecMESH 1389 type (hecmwST_local_matrix), intent(in) :: BTmat 1390 integer(kind=kint), intent(in) :: incl_nz(:) 1391 integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:) 1392 integer(kind=kint), intent(in) :: exp_rows_item(:,:) 1393 type (hecmwST_local_matrix), allocatable, intent(out) :: BT_ext(:) 1394 integer(kind=kint) :: ndof, ndof2, nn_int, nnz_int, idom, j, idx, ncol, cnt, jrow, ks, ke, k, kcol 1395 allocate(BT_ext(hecMESH%n_neighbor_pe)) 1396 ndof = BTmat%ndof 1397 ndof2 = ndof * ndof 1398 nn_int = hecMESH%nn_internal 1399 nnz_int = BTmat%index(nn_int) 1400 do idom = 1, hecMESH%n_neighbor_pe 1401 BT_ext(idom)%nr = exp_rows_index(idom) - exp_rows_index(idom-1) 1402 BT_ext(idom)%nc = BTmat%nc 1403 BT_ext(idom)%nnz = 0 1404 BT_ext(idom)%ndof = ndof 1405 allocate(BT_ext(idom)%index(0:BT_ext(idom)%nr)) 1406 BT_ext(idom)%index(0) = 0 1407 do j = 1, BT_ext(idom)%nr 1408 idx = exp_rows_index(idom-1) + j 1409 ncol = exp_rows_item(2,idx) 1410 BT_ext(idom)%index(j) = BT_ext(idom)%index(j-1) + ncol 1411 enddo 1412 BT_ext(idom)%nnz = BT_ext(idom)%index(BT_ext(idom)%nr) 1413 if (DEBUG >= 1) write(0,*) 'DEBUG: idom,nr,nc,nnz,ndof', & 1414 idom,BT_ext(idom)%nr,BT_ext(idom)%nc,BT_ext(idom)%nnz,BT_ext(idom)%ndof 1415 allocate(BT_ext(idom)%item(BT_ext(idom)%nnz)) 1416 allocate(BT_ext(idom)%A(BT_ext(idom)%nnz * ndof2)) 1417 cnt = 0 1418 do j = 1, BT_ext(idom)%nr 1419 idx = exp_rows_index(idom-1) + j 1420 jrow = exp_rows_item(1,idx) 1421 if (jrow < 1 .or. BTmat%nr < jrow) stop 'ERROR: extract BT_ext: jrow' 1422 ks = BTmat%index(jrow-1)+1 1423 ke = BTmat%index(jrow) 1424 do k = ks, ke 1425 kcol = BTmat%item(k) 1426 if (incl_nz(k-nnz_int) == 0) cycle 1427 cnt = cnt + 1 1428 BT_ext(idom)%item(cnt) = kcol 1429 BT_ext(idom)%A(ndof2*(cnt-1)+1:ndof2*cnt) = BTmat%A(ndof2*(k-1)+1:ndof2*k) 1430 enddo 1431 if (cnt /= BT_ext(idom)%index(j)) stop 'ERROR: extract BT_ext' 1432 enddo 1433 ! if (DEBUG >= 3) write(100+hecMESH%my_rank,*) 'BT_ext(',idom,')' 1434 ! call hecmw_localmat_write(BT_ext(idom), 100+hecMESH%my_rank) 1435 enddo 1436 end subroutine extract_BT_ext 1437 1438 subroutine prepare_column_info(hecMESH, BT_ext, exp_cols_index, exp_cols_item) 1439 implicit none 1440 type (hecmwST_local_mesh), intent(in) :: hecMESH 1441 type (hecmwST_local_matrix), intent(in) :: BT_ext(:) 1442 integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:) 1443 integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:) 1444 ! 1445 call make_exp_cols_index(hecMESH%n_neighbor_pe, BT_ext, exp_cols_index) 1446 if (DEBUG >= 2) write(0,*) ' DEBUG2: make exp_cols_index done' 1447 if (DEBUG >= 3) write(0,*) ' DEBUG3: exp_cols_index', exp_cols_index(0:hecMESH%n_neighbor_pe) 1448 ! 1449 ! (col ID, rank, global ID) 1450 ! 1451 call make_exp_cols_item(hecMESH, BT_ext, exp_cols_index, exp_cols_item) 1452 if (DEBUG >= 2) write(0,*) ' DEBUG2: make exp_cols_item done' 1453 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: exp_cols_item', exp_cols_item(1:cNCOL_ITEM,1:exp_cols_index(hecMESH%n_neighbor_pe)) 1454 end subroutine prepare_column_info 1455 1456 subroutine make_exp_cols_index(nnb, BT_ext, exp_cols_index) 1457 implicit none 1458 integer(kind=kint), intent(in) :: nnb 1459 type (hecmwST_local_matrix), intent(in) :: BT_ext(:) 1460 integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:) 1461 integer(kind=kint) :: idom 1462 allocate(exp_cols_index(0:nnb)) 1463 exp_cols_index(0) = 0 1464 do idom = 1, nnb 1465 exp_cols_index(idom) = exp_cols_index(idom-1) + BT_ext(idom)%nnz 1466 enddo 1467 end subroutine make_exp_cols_index 1468 1469 subroutine make_exp_cols_item(hecMESH, BT_ext, exp_cols_index, exp_cols_item) 1470 implicit none 1471 type (hecmwST_local_mesh), intent(in) :: hecMESH 1472 type (hecmwST_local_matrix), intent(in) :: BT_ext(:) 1473 integer(kind=kint), allocatable, intent(in) :: exp_cols_index(:) 1474 integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:) 1475 integer(kind=kint) :: cnt, idom, j, jcol 1476 allocate(exp_cols_item(cNCOL_ITEM,exp_cols_index(hecMESH%n_neighbor_pe))) 1477 cnt = 0 1478 do idom = 1, hecMESH%n_neighbor_pe 1479 do j = 1, BT_ext(idom)%nnz 1480 cnt = cnt + 1 1481 jcol = BT_ext(idom)%item(j) 1482 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: idom,j,cnt,jcol,nn_internal,n_node',& 1483 ! idom,j,cnt,jcol,hecMESH%nn_internal,hecMESH%n_node 1484 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of exp_cols_item',size(exp_cols_item) 1485 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of node_ID',size(hecMESH%node_ID) 1486 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of global_node_ID',size(hecMESH%global_node_ID) 1487 exp_cols_item(cLID,cnt) = hecMESH%node_ID(2*jcol-1) 1488 exp_cols_item(cRANK,cnt) = hecMESH%node_ID(2*jcol) 1489 if (cNCOL_ITEM >= 3) exp_cols_item(cGID,cnt) = hecMESH%global_node_ID(jcol) 1490 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: lid,rank(,gid)',exp_cols_item(1:cNCOL_ITEM,cnt) 1491 enddo 1492 if (cnt /= exp_cols_index(idom)) stop 'ERROR: make exp_cols_item' 1493 enddo 1494 end subroutine make_exp_cols_item 1495 1496 subroutine send_BT_ext_and_recv_BT_int(hecMESH, exp_rows_index, exp_rows_item, BT_ext, & 1497 exp_cols_index, exp_cols_item, BT_int, hecMESHnew) 1498 implicit none 1499 type (hecmwST_local_mesh), intent(in) :: hecMESH 1500 integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:) 1501 integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:) 1502 type (hecmwST_local_matrix), allocatable, intent(inout) :: BT_ext(:) 1503 type (hecmwST_local_matrix), intent(out) :: BT_int 1504 type (hecmwST_local_mesh), intent(inout) :: hecMESHnew 1505 integer(kind=kint), allocatable :: imp_rows_index(:), imp_cols_index(:) 1506 integer(kind=kint), allocatable :: imp_rows_item(:,:), imp_cols_item(:,:) 1507 real(kind=kreal), allocatable :: imp_vals_item(:) 1508 integer(kind=kint), allocatable :: map(:), add_nodes(:,:) 1509 integer(kind=kint) :: ndof, ndof2, idom, n_add_node, i0 1510 if (hecMESH%n_neighbor_pe == 0) return 1511 ndof = BT_ext(1)%ndof 1512 ndof2 = ndof*ndof 1513 ! 1514 call convert_rowID_to_remote_localID(hecMESH, exp_rows_index(hecMESH%n_neighbor_pe), exp_rows_item) 1515 if (DEBUG >= 2) write(0,*) ' DEBUG2: convert rowID to remote localID done' 1516 ! 1517 call send_recv_BT_ext_nr_nnz(hecMESH, BT_ext, imp_rows_index, imp_cols_index) 1518 if (DEBUG >= 2) write(0,*) ' DEBUG2: send recv BT_ext nr and nnz done' 1519 ! 1520 call send_recv_BT_ext_contents(hecMESH, BT_ext, & 1521 exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, & 1522 imp_rows_index, imp_cols_index, & 1523 imp_rows_item, imp_cols_item, imp_vals_item) 1524 if (DEBUG >= 2) write(0,*) ' DEBUG2: send recv BT_ext contents done' 1525 ! 1526 do idom = 1, hecMESH%n_neighbor_pe 1527 call hecmw_localmat_free(BT_ext(idom)) 1528 enddo 1529 deallocate(BT_ext) 1530 ! 1531 call allocate_BT_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int) 1532 if (DEBUG >= 2) write(0,*) ' DEBUG2: allocate BT_int done' 1533 ! 1534 ! call copy_mesh(hecMESH, hecMESHnew) 1535 ! if (DEBUG >= 2) write(0,*) ' DEBUG2: copy mesh done' 1536 ! 1537 call map_imported_cols(hecMESHnew, imp_cols_index(hecMESH%n_neighbor_pe), & 1538 imp_cols_item, n_add_node, add_nodes, map, i0) 1539 if (DEBUG >= 2) write(0,*) ' DEBUG2: map imported cols done' 1540 ! 1541 call update_comm_table(hecMESHnew, n_add_node, add_nodes, i0) 1542 if (DEBUG >= 2) write(0,*) ' DEBUG2: update comm_table done' 1543 ! 1544 BT_int%nc = hecMESHnew%n_node 1545 ! 1546 call copy_vals_to_BT_int(hecMESH%n_neighbor_pe, imp_rows_index, imp_cols_index, & 1547 imp_rows_item, map, ndof2, imp_vals_item, BT_int) 1548 if (DEBUG >= 2) write(0,*) ' DEBUG2: copy vals to BT_int done' 1549 ! 1550 deallocate(imp_rows_index) 1551 deallocate(imp_cols_index) 1552 deallocate(imp_rows_item) 1553 deallocate(imp_cols_item) 1554 deallocate(imp_vals_item) 1555 deallocate(map) 1556 ! 1557 call sort_and_uniq_rows(BT_int) 1558 if (DEBUG >= 2) write(0,*) ' DEBUG2: sort and uniq rows of BT_int done' 1559 end subroutine send_BT_ext_and_recv_BT_int 1560 1561 subroutine convert_rowID_to_remote_localID(hecMESH, len, exp_rows_item) 1562 implicit none 1563 type (hecmwST_local_mesh), intent(in) :: hecMESH 1564 integer(kind=kint), intent(in) :: len 1565 integer(kind=kint), intent(out) :: exp_rows_item(:,:) 1566 integer(kind=kint) :: i 1567 do i = 1, len 1568 exp_rows_item(1,i) = hecMESH%node_ID(2 * exp_rows_item(1,i) - 1) 1569 enddo 1570 end subroutine convert_rowID_to_remote_localID 1571 1572 subroutine send_recv_BT_ext_nr_nnz(hecMESH, BT_ext, imp_rows_index, imp_cols_index) 1573 use m_hecmw_comm_f 1574 implicit none 1575 type (hecmwST_local_mesh), intent(in) :: hecMESH 1576 type (hecmwST_local_matrix), intent(in) :: BT_ext(:) 1577 integer(kind=kint), allocatable, intent(out) :: imp_rows_index(:), imp_cols_index(:) 1578 integer(kind=kint) :: nnb, idom, irank, tag, recvbuf(2) 1579 integer(kind=kint), allocatable :: sendbuf(:,:) 1580 integer(kind=kint), allocatable :: requests(:) 1581 integer(kind=kint), allocatable :: statuses(:,:) 1582 nnb = hecMESH%n_neighbor_pe 1583 allocate(imp_rows_index(0:nnb)) 1584 allocate(imp_cols_index(0:nnb)) 1585 allocate(requests(nnb)) 1586 allocate(statuses(HECMW_STATUS_SIZE, nnb)) 1587 allocate(sendbuf(2,nnb)) 1588 do idom = 1, nnb 1589 irank = hecMESH%neighbor_pe(idom) 1590 ! nr = exp_rows_per_rank(idom) 1591 sendbuf(1,idom) = BT_ext(idom)%nr 1592 sendbuf(2,idom) = BT_ext(idom)%nnz 1593 tag=2001 1594 call HECMW_ISEND_INT(sendbuf(1,idom), 2, irank, tag, hecMESH%MPI_COMM, & 1595 requests(idom)) 1596 enddo 1597 imp_rows_index(0) = 0 1598 imp_cols_index(0) = 0 1599 do idom = 1, nnb 1600 irank = hecMESH%neighbor_pe(idom) 1601 tag = 2001 1602 call HECMW_RECV_INT(recvbuf, 2, irank, tag, & 1603 hecMESH%MPI_COMM, statuses(:,1)) 1604 imp_rows_index(idom) = imp_rows_index(idom-1) + recvbuf(1) 1605 imp_cols_index(idom) = imp_cols_index(idom-1) + recvbuf(2) 1606 enddo 1607 call HECMW_Waitall(nnb, requests, statuses) 1608 deallocate(requests) 1609 deallocate(statuses) 1610 deallocate(sendbuf) 1611 end subroutine send_recv_BT_ext_nr_nnz 1612 1613 subroutine send_recv_BT_ext_contents(hecMESH, BT_ext, & 1614 exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, & 1615 imp_rows_index, imp_cols_index, & 1616 imp_rows_item, imp_cols_item, imp_vals_item) 1617 use m_hecmw_comm_f 1618 implicit none 1619 type (hecmwST_local_mesh), intent(in) :: hecMESH 1620 type (hecmwST_local_matrix), intent(in) :: BT_ext(:) 1621 integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:) 1622 integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:) 1623 integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:) 1624 integer(kind=kint), allocatable, intent(out) :: imp_rows_item(:,:), imp_cols_item(:,:) 1625 real(kind=kreal), allocatable, intent(out) :: imp_vals_item(:) 1626 integer(kind=kint) :: nnb, ndof2, n_send, idom, irank, tag, nr, nnz 1627 integer(kind=kint), allocatable :: requests(:) 1628 integer(kind=kint), allocatable :: statuses(:,:) 1629 nnb = hecMESH%n_neighbor_pe 1630 if (nnb < 1) return 1631 ndof2 = BT_ext(1)%ndof ** 2 1632 allocate(imp_rows_item(2,imp_rows_index(nnb))) 1633 allocate(imp_cols_item(cNCOL_ITEM,imp_cols_index(nnb))) 1634 allocate(imp_vals_item(ndof2*imp_cols_index(nnb))) 1635 allocate(requests(3*nnb)) 1636 allocate(statuses(HECMW_STATUS_SIZE, 3*nnb)) 1637 n_send = 0 1638 do idom = 1, nnb 1639 irank = hecMESH%neighbor_pe(idom) 1640 if (BT_ext(idom)%nr > 0) then 1641 n_send = n_send + 1 1642 tag = 2002 1643 call HECMW_ISEND_INT(exp_rows_item(1,exp_rows_index(idom-1)+1), & 1644 2*BT_ext(idom)%nr, irank, tag, hecMESH%MPI_COMM, & 1645 requests(n_send)) 1646 n_send = n_send + 1 1647 tag = 2003 1648 call HECMW_ISEND_INT(exp_cols_item(1,exp_cols_index(idom-1)+1), & 1649 cNCOL_ITEM*BT_ext(idom)%nnz, irank, tag, hecMESH%MPI_COMM, & 1650 requests(n_send)) 1651 n_send = n_send + 1 1652 tag = 2004 1653 call HECMW_ISEND_R(BT_ext(idom)%A, ndof2*BT_ext(idom)%nnz, irank, & 1654 tag, hecMESH%MPI_COMM, requests(n_send)) 1655 endif 1656 enddo 1657 do idom = 1, nnb 1658 irank = hecMESH%neighbor_pe(idom) 1659 nr = imp_rows_index(idom) - imp_rows_index(idom-1) 1660 nnz = imp_cols_index(idom) - imp_cols_index(idom-1) 1661 if (nr > 0) then 1662 tag = 2002 1663 call HECMW_RECV_INT(imp_rows_item(1,imp_rows_index(idom-1)+1), & 1664 2*nr, irank, tag, hecMESH%MPI_COMM, statuses(:,1)) 1665 tag = 2003 1666 call HECMW_RECV_INT(imp_cols_item(1,imp_cols_index(idom-1)+1), & 1667 cNCOL_ITEM*nnz, irank, tag, hecMESH%MPI_COMM, statuses(:,1)) 1668 tag = 2004 1669 call HECMW_RECV_R(imp_vals_item(ndof2*imp_cols_index(idom-1)+1), & 1670 ndof2*nnz, irank, tag, hecMESH%MPI_COMM, statuses(:,1)) 1671 endif 1672 enddo 1673 call HECMW_Waitall(n_send, requests, statuses) 1674 deallocate(exp_rows_index) 1675 deallocate(exp_rows_item) 1676 deallocate(exp_cols_index) 1677 deallocate(exp_cols_item) 1678 end subroutine send_recv_BT_ext_contents 1679 1680 subroutine allocate_BT_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int) 1681 implicit none 1682 type (hecmwST_local_mesh), intent(in) :: hecMESH 1683 integer(kind=kint), intent(in) :: ndof 1684 integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_rows_item(:,:) 1685 type (hecmwST_local_matrix), intent(out) :: BT_int 1686 integer(kind=kint), allocatable :: cnt(:) 1687 integer(kind=kint) :: idom, is, ie, i, irow, ncol, ndof2 1688 ndof2 = ndof*ndof 1689 BT_int%nr = hecMESH%nn_internal 1690 BT_int%nc = hecMESH%n_node 1691 BT_int%nnz = 0 1692 BT_int%ndof = ndof 1693 allocate(cnt(BT_int%nr)) 1694 cnt(:) = 0 1695 do idom = 1, hecMESH%n_neighbor_pe 1696 is = imp_rows_index(idom-1)+1 1697 ie = imp_rows_index(idom) 1698 do i = is, ie 1699 irow = imp_rows_item(1,i) 1700 ncol = imp_rows_item(2,i) 1701 if (irow < 1 .or. BT_int%nr < irow) stop 'ERROR: allocate BT_int' 1702 cnt(irow) = cnt(irow) + ncol !!! might include duplicate cols 1703 enddo 1704 enddo 1705 ! 1706 allocate(BT_int%index(0:BT_int%nr)) 1707 call make_index(BT_int%nr, cnt, BT_int%index) 1708 ! 1709 BT_int%nnz = BT_int%index(BT_int%nr) 1710 allocate(BT_int%item(BT_int%nnz)) 1711 allocate(BT_int%A(BT_int%nnz * ndof2)) 1712 BT_int%A(:) = 0.d0 1713 end subroutine allocate_BT_int 1714 1715 subroutine copy_mesh(src, dst) 1716 implicit none 1717 type (hecmwST_local_mesh), intent(in) :: src 1718 type (hecmwST_local_mesh), intent(out) :: dst 1719 dst%zero = src%zero 1720 dst%MPI_COMM = src%MPI_COMM 1721 dst%PETOT = src%PETOT 1722 dst%PEsmpTOT = src%PEsmpTOT 1723 dst%my_rank = src%my_rank 1724 dst%n_subdomain = src%n_subdomain 1725 dst%n_node = src%n_node 1726 dst%nn_internal = src%nn_internal 1727 dst%n_dof = src%n_dof 1728 dst%n_neighbor_pe = src%n_neighbor_pe 1729 allocate(dst%neighbor_pe(dst%n_neighbor_pe)) 1730 dst%neighbor_pe(:) = src%neighbor_pe(:) 1731 allocate(dst%import_index(0:dst%n_neighbor_pe)) 1732 allocate(dst%export_index(0:dst%n_neighbor_pe)) 1733 dst%import_index(:)= src%import_index(:) 1734 dst%export_index(:)= src%export_index(:) 1735 allocate(dst%import_item(dst%import_index(dst%n_neighbor_pe))) 1736 dst%import_item(:) = src%import_item(:) 1737 allocate(dst%export_item(dst%export_index(dst%n_neighbor_pe))) 1738 dst%export_item(:) = src%export_item(:) 1739 allocate(dst%node_ID(2*dst%n_node)) 1740 dst%node_ID(1:2*dst%n_node) = src%node_ID(1:2*src%n_node) 1741 allocate(dst%global_node_ID(dst%n_node)) 1742 dst%global_node_ID(1:dst%n_node) = src%global_node_ID(1:src%n_node) 1743 dst%mpc%n_mpc = 0 1744 dst%node => src%node 1745 end subroutine copy_mesh 1746 1747 subroutine map_imported_cols(hecMESHnew, ncols, cols, n_add_node, add_nodes, map, i0) 1748 implicit none 1749 type (hecmwST_local_mesh), intent(inout) :: hecMESHnew 1750 integer(kind=kint), intent(in) :: ncols 1751 integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols) 1752 integer(kind=kint), allocatable, intent(out) :: map(:) 1753 integer(kind=kint), intent(out) :: n_add_node 1754 integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:) 1755 integer(kind=kint), intent(out) :: i0 1756 allocate(map(ncols)) 1757 ! 1758 call map_present_nodes(hecMESHnew, ncols, cols, map, n_add_node) 1759 ! 1760 ! add nodes == unmapped nodes 1761 ! 1762 call extract_add_nodes(ncols, cols, map, n_add_node, add_nodes) 1763 ! 1764 call append_nodes(hecMESHnew, n_add_node, add_nodes, i0) 1765 ! 1766 call map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map) 1767 end subroutine map_imported_cols 1768 1769 subroutine map_present_nodes(hecMESH, ncols, cols, map, n_add_node) 1770 implicit none 1771 type (hecmwST_local_mesh), intent(in) :: hecMESH 1772 integer(kind=kint), intent(in) :: ncols 1773 integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols) 1774 integer(kind=kint), intent(out) :: map(ncols) 1775 integer(kind=kint), intent(out) :: n_add_node 1776 integer(kind=kint) :: i, j, lid, rank, llid, n_ext_node, idx 1777 integer(kind=kint), allocatable :: ext_node(:) 1778 type (hecmwST_pair_array) :: parray 1779 ! 1780 call hecmw_pair_array_init(parray, hecMESH%n_node - hecMESH%nn_internal) 1781 do i = hecMESH%nn_internal + 1, hecMESH%n_node 1782 call hecmw_pair_array_append(parray, i, hecMESH%node_ID(2*i-1), hecMESH%node_ID(2*i)) 1783 enddo 1784 call hecmw_pair_array_sort(parray) 1785 ! 1786 n_add_node = 0 1787 n_ext_node = 0 1788 allocate(ext_node(ncols)) 1789 !$omp parallel default(none), & 1790 !$omp& private(i,lid,rank,llid,idx,j), & 1791 !$omp& shared(ncols,hecMESH,cols,map,n_ext_node,ext_node,parray), & 1792 !$omp& reduction(+:n_add_node) 1793 !$omp do 1794 do i = 1, ncols 1795 lid = cols(cLID,i) 1796 rank = cols(cRANK,i) 1797 ! check rank 1798 if (rank == hecMESH%my_rank) then ! internal: set mapping 1799 map(i) = lid 1800 else ! external 1801 !$omp atomic capture 1802 n_ext_node = n_ext_node + 1 1803 idx = n_ext_node 1804 !$omp end atomic 1805 ext_node(idx) = i 1806 endif 1807 enddo 1808 !$omp end do 1809 !$omp do 1810 do j = 1, n_ext_node 1811 i = ext_node(j) 1812 lid = cols(cLID,i) 1813 rank = cols(cRANK,i) 1814 ! search node_ID in external nodes 1815 llid = hecmw_pair_array_find_id(parray, lid, rank) 1816 if (llid > 0) then ! found: set mapping 1817 map(i) = llid 1818 else ! not found 1819 map(i) = -1 1820 n_add_node = n_add_node + 1 1821 endif 1822 enddo 1823 !$omp end do 1824 !$omp end parallel 1825 deallocate(ext_node) 1826 ! 1827 call hecmw_pair_array_finalize(parray) 1828 end subroutine map_present_nodes 1829 1830 subroutine extract_add_nodes(ncols, cols, map, n_add_node, add_nodes) 1831 implicit none 1832 integer(kind=kint), intent(in) :: ncols 1833 integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols), map(ncols) 1834 integer(kind=kint), intent(inout) :: n_add_node 1835 integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:) 1836 integer(kind=kint) :: cnt, i 1837 allocate(add_nodes(cNCOL_ITEM,n_add_node)) 1838 cnt = 0 1839 do i = 1, ncols 1840 if (map(i) == -1) then 1841 cnt = cnt + 1 1842 add_nodes(1:cNCOL_ITEM,cnt) = cols(1:cNCOL_ITEM,i) 1843 endif 1844 enddo 1845 if (cnt /= n_add_node) stop 'ERROR: extract add_nodes' 1846 call sort_and_uniq_add_nodes(n_add_node, add_nodes) 1847 end subroutine extract_add_nodes 1848 1849 subroutine sort_and_uniq_add_nodes(n_add_node, add_nodes) 1850 implicit none 1851 integer(kind=kint), intent(inout) :: n_add_node 1852 integer(kind=kint), intent(inout) :: add_nodes(cNCOL_ITEM,n_add_node) 1853 integer(kind=kint) :: ndup 1854 call sort_add_nodes(add_nodes, 1, n_add_node) 1855 call uniq_add_nodes(add_nodes, n_add_node, ndup) 1856 n_add_node = n_add_node - ndup 1857 end subroutine sort_and_uniq_add_nodes 1858 1859 recursive subroutine sort_add_nodes(add_nodes, id1, id2) 1860 implicit none 1861 integer(kind=kint), intent(inout) :: add_nodes(:,:) 1862 integer(kind=kint), intent(in) :: id1, id2 1863 integer(kind=kint) :: center, left, right 1864 integer(kind=kint) :: pivot(cNCOL_ITEM), tmp(cNCOL_ITEM) 1865 if (id1 >= id2) return 1866 center = (id1 + id2) / 2 1867 pivot(1:cNCOL_ITEM) = add_nodes(1:cNCOL_ITEM,center) 1868 left = id1 1869 right = id2 1870 do 1871 do while ((add_nodes(cRANK,left) < pivot(cRANK)) .or. & 1872 (add_nodes(cRANK,left) == pivot(cRANK) .and. add_nodes(cLID,left) < pivot(cLID))) 1873 left = left + 1 1874 enddo 1875 do while ((pivot(cRANK) < add_nodes(cRANK,right)) .or. & 1876 (pivot(cRANK) == add_nodes(cRANK,right) .and. pivot(cLID) < add_nodes(cLID,right))) 1877 right = right - 1 1878 enddo 1879 if (left >= right) exit 1880 tmp(1:cNCOL_ITEM) = add_nodes(1:cNCOL_ITEM,left) 1881 add_nodes(1:cNCOL_ITEM,left) = add_nodes(1:cNCOL_ITEM,right) 1882 add_nodes(1:cNCOL_ITEM,right) = tmp(1:cNCOL_ITEM) 1883 left = left + 1 1884 right = right - 1 1885 enddo 1886 if (id1 < left-1) call sort_add_nodes(add_nodes, id1, left-1) 1887 if (right+1 < id2) call sort_add_nodes(add_nodes, right+1, id2) 1888 return 1889 end subroutine sort_add_nodes 1890 1891 subroutine uniq_add_nodes(add_nodes, len, ndup) 1892 implicit none 1893 integer(kind=kint), intent(inout) :: add_nodes(:,:) 1894 integer(kind=kint), intent(in) :: len 1895 integer(kind=kint), intent(out) :: ndup 1896 integer(kind=kint) :: i 1897 ndup = 0 1898 do i = 2,len 1899 if (add_nodes(cLID,i) == add_nodes(cLID,i-1-ndup) .and. & 1900 add_nodes(cRANK,i) == add_nodes(cRANK,i-1-ndup)) then 1901 ndup = ndup + 1 1902 else if (ndup > 0) then 1903 add_nodes(1:cNCOL_ITEM,i-ndup) = add_nodes(1:cNCOL_ITEM,i) 1904 endif 1905 enddo 1906 end subroutine uniq_add_nodes 1907 1908 subroutine search_add_nodes(n_add_node, add_nodes, rank, lid, idx) 1909 implicit none 1910 integer(kind=kint), intent(in) :: n_add_node 1911 integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node) 1912 integer(kind=kint), intent(in) :: rank 1913 integer(kind=kint), intent(in) :: lid 1914 integer(kind=kint), intent(out) :: idx 1915 integer(kind=kint) :: left, right, center 1916 left = 1 1917 right = n_add_node 1918 do while (left <= right) 1919 center = (left + right) / 2 1920 if ((rank == add_nodes(cRANK,center)) .and. (lid == add_nodes(cLID,center))) then 1921 idx = center 1922 return 1923 else if ((rank < add_nodes(cRANK,center)) .or. & 1924 (rank == add_nodes(cRANK,center) .and. lid < add_nodes(cLID,center))) then 1925 right = center - 1 1926 else if ((add_nodes(cRANK,center) < rank) .or. & 1927 (add_nodes(cRANK,center) == rank .and. add_nodes(cLID,center) < lid)) then 1928 left = center + 1 1929 endif 1930 end do 1931 idx = -1 1932 end subroutine search_add_nodes 1933 1934 subroutine append_nodes(hecMESHnew, n_add_node, add_nodes, i0) 1935 implicit none 1936 type (hecmwST_local_mesh), intent(inout) :: hecMESHnew 1937 integer(kind=kint), intent(in) :: n_add_node 1938 integer(kind=kint), intent(in) :: add_nodes(:,:) 1939 integer(kind=kint), intent(out) :: i0 1940 integer(kind=kint) :: n_node, i, ii 1941 integer(kind=kint), pointer :: node_ID(:), global_node_ID(:) 1942 i0 = hecMESHnew%n_node 1943 n_node = hecMESHnew%n_node + n_add_node 1944 allocate(node_ID(2*n_node)) 1945 allocate(global_node_ID(n_node)) 1946 do i = 1, hecMESHnew%n_node 1947 node_ID(2*i-1) = hecMESHnew%node_ID(2*i-1) 1948 node_ID(2*i ) = hecMESHnew%node_ID(2*i ) 1949 global_node_ID(i) = hecMESHnew%global_node_ID(i) 1950 enddo 1951 do i = 1, n_add_node 1952 ii = hecMESHnew%n_node + i 1953 node_ID(2*ii-1) = add_nodes(cLID,i) 1954 node_ID(2*ii ) = add_nodes(cRANK,i) 1955 if (cNCOL_ITEM >= 3) then 1956 global_node_ID(i) = add_nodes(cGID,i) 1957 else 1958 global_node_ID(i) = -1 1959 endif 1960 enddo 1961 deallocate(hecMESHnew%node_ID) 1962 deallocate(hecMESHnew%global_node_ID) 1963 hecMESHnew%n_node = n_node 1964 hecMESHnew%node_ID => node_ID 1965 hecMESHnew%global_node_ID => global_node_ID 1966 end subroutine append_nodes 1967 1968 subroutine map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map) 1969 implicit none 1970 integer(kind=kint), intent(in) :: ncols 1971 integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols) 1972 integer(kind=kint), intent(in) :: n_add_node 1973 integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node) 1974 integer(kind=kint), intent(in) :: i0 1975 integer(kind=kint), intent(inout) :: map(ncols) 1976 integer(kind=kint) :: i, j 1977 do i = 1, ncols 1978 if (map(i) > 0) cycle 1979 call search_add_nodes(n_add_node, add_nodes, cols(cRANK,i), cols(cLID,i), j) 1980 if (j == -1) stop 'ERROR: map_additional_nodes' 1981 map(i) = i0 + j 1982 enddo 1983 end subroutine map_additional_nodes 1984 1985 subroutine update_comm_table(hecMESHnew, n_add_node, add_nodes, i0) 1986 use m_hecmw_comm_f 1987 implicit none 1988 type (hecmwST_local_mesh), intent(inout) :: hecMESHnew 1989 integer(kind=kint), intent(in) :: n_add_node 1990 integer(kind=kint), allocatable, intent(inout) :: add_nodes(:,:) 1991 integer(kind=kint), intent(in) :: i0 1992 integer(kind=kint), allocatable :: n_add_imp(:), add_imp_index(:) 1993 integer(kind=kint), allocatable :: add_imp_item_remote(:), add_imp_item_local(:) 1994 integer(kind=kint), allocatable :: n_add_exp(:), add_exp_index(:), add_exp_item(:) 1995 integer(kind=kint), allocatable :: n_new_imp(:), n_new_exp(:) 1996 integer(kind=kint) :: npe, nnb, comm, new_nnb 1997 integer(kind=kint), pointer :: nbpe(:), new_nbpe(:) 1998 integer(kind=kint), pointer :: import_index(:), export_index(:), import_item(:), export_item(:) 1999 integer(kind=kint), pointer :: new_import_index(:), new_export_index(:) 2000 integer(kind=kint), pointer :: new_import_item(:), new_export_item(:) 2001 npe = hecMESHnew%PETOT 2002 nnb = hecMESHnew%n_neighbor_pe 2003 comm = hecMESHnew%MPI_COMM 2004 nbpe => hecMESHnew%neighbor_pe 2005 import_index => hecMESHnew%import_index 2006 export_index => hecMESHnew%export_index 2007 import_item => hecMESHnew%import_item 2008 export_item => hecMESHnew%export_item 2009 ! 2010 call count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp) 2011 if (DEBUG >= 3) write(0,*) ' DEBUG3: count add_imp per rank done' 2012 ! 2013 allocate(add_imp_index(0:npe)) 2014 call make_index(npe, n_add_imp, add_imp_index) 2015 if (DEBUG >= 3) write(0,*) ' DEBUG3: make add_imp_index done' 2016 ! 2017 call make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, & 2018 add_imp_item_remote, add_imp_item_local) 2019 if (DEBUG >= 3) write(0,*) ' DEBUG3: make add_imp_item done' 2020 ! 2021 deallocate(add_nodes) 2022 ! 2023 ! all_to_all n_add_imp -> n_add_exp 2024 ! 2025 allocate(n_add_exp(npe)) 2026 call HECMW_ALLTOALL_INT(n_add_imp, 1, n_add_exp, 1, comm) 2027 if (DEBUG >= 3) write(0,*) ' DEBUG3: alltoall n_add_imp to n_add_exp done' 2028 ! 2029 allocate(add_exp_index(0:npe)) 2030 call make_index(npe, n_add_exp, add_exp_index) 2031 if (DEBUG >= 3) write(0,*) ' DEBUG3: make add_exp_index done' 2032 ! 2033 call send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, & 2034 add_exp_index, add_exp_item, comm) 2035 if (DEBUG >= 3) write(0,*) ' DEBUG3: send recv add_imp/exp_item done' 2036 ! 2037 ! count new import 2038 ! 2039 call count_new_comm_nodes(npe, nnb, nbpe, import_index, n_add_imp, n_new_imp) 2040 if (DEBUG >= 3) write(0,*) ' DEBUG3: count new comm_nodes (import) done' 2041 ! 2042 ! count new export 2043 ! 2044 call count_new_comm_nodes(npe, nnb, nbpe, export_index, n_add_exp, n_new_exp) 2045 if (DEBUG >= 3) write(0,*) ' DEBUG3: count new comm_nodes (export) done' 2046 ! 2047 call update_neighbor_pe(npe, n_new_imp, n_new_exp, new_nnb, new_nbpe) 2048 if (DEBUG >= 3) write(0,*) ' DEBUG3: update neighbor_pe done' 2049 ! 2050 ! merge import table: import 2051 ! 2052 call merge_comm_table(npe, nnb, nbpe, import_index, import_item, & 2053 new_nnb, new_nbpe, add_imp_index, add_imp_item_local, n_add_imp, n_new_imp, & 2054 new_import_index, new_import_item) 2055 if (DEBUG >= 3) write(0,*) ' DEBUG3: merge comm_table (import) done' 2056 ! 2057 deallocate(n_add_imp) 2058 deallocate(add_imp_index) 2059 deallocate(add_imp_item_remote, add_imp_item_local) 2060 deallocate(n_new_imp) 2061 ! 2062 ! merge export table: export 2063 ! 2064 call merge_comm_table(npe, nnb, nbpe, export_index, export_item, & 2065 new_nnb, new_nbpe, add_exp_index, add_exp_item, n_add_exp, n_new_exp, & 2066 new_export_index, new_export_item) 2067 if (DEBUG >= 3) write(0,*) ' DEBUG3: merge comm_table (export) done' 2068 ! 2069 deallocate(n_add_exp) 2070 deallocate(add_exp_index) 2071 deallocate(add_exp_item) 2072 deallocate(n_new_exp) 2073 ! 2074 deallocate(nbpe) 2075 deallocate(import_index,import_item) 2076 deallocate(export_index,export_item) 2077 hecMESHnew%n_neighbor_pe = new_nnb 2078 hecMESHnew%neighbor_pe => new_nbpe 2079 hecMESHnew%import_index => new_import_index 2080 hecMESHnew%export_index => new_export_index 2081 hecMESHnew%import_item => new_import_item 2082 hecMESHnew%export_item => new_export_item 2083 end subroutine update_comm_table 2084 2085 subroutine count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp) 2086 implicit none 2087 integer(kind=kint), intent(in) :: n_add_node 2088 integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node) 2089 integer(kind=kint), intent(in) :: npe 2090 integer(kind=kint), allocatable, intent(out) :: n_add_imp(:) 2091 integer(kind=kint) :: i, rank 2092 allocate(n_add_imp(npe)) 2093 n_add_imp(:) = 0 2094 do i = 1, n_add_node 2095 rank = add_nodes(cRANK,i) 2096 n_add_imp(rank+1) = n_add_imp(rank+1) + 1 2097 enddo 2098 end subroutine count_add_imp_per_rank 2099 2100 subroutine make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, & 2101 add_imp_item_remote, add_imp_item_local) 2102 implicit none 2103 integer(kind=kint), intent(in) :: n_add_node 2104 integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node) 2105 integer(kind=kint), intent(in) :: npe, i0 2106 integer(kind=kint), allocatable, intent(in) :: add_imp_index(:) 2107 integer(kind=kint), allocatable, intent(out) :: add_imp_item_remote(:), add_imp_item_local(:) 2108 integer(kind=kint), allocatable :: cnt(:) 2109 integer(kind=kint) :: i, lid, rank, ipe 2110 allocate(add_imp_item_remote(add_imp_index(npe))) 2111 allocate(add_imp_item_local(add_imp_index(npe))) 2112 allocate(cnt(npe)) 2113 cnt(:) = 0 2114 do i = 1, n_add_node 2115 lid = add_nodes(cLID,i) 2116 rank = add_nodes(cRANK,i) 2117 ipe = rank + 1 2118 cnt(ipe) = cnt(ipe) + 1 2119 add_imp_item_remote(add_imp_index(ipe-1) + cnt(ipe)) = lid 2120 add_imp_item_local(add_imp_index(ipe-1) + cnt(ipe)) = i0 + i 2121 enddo 2122 deallocate(cnt) 2123 end subroutine make_add_imp_item 2124 2125 subroutine send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, & 2126 add_exp_index, add_exp_item, mpi_comm) 2127 use m_hecmw_comm_f 2128 implicit none 2129 integer(kind=kint), intent(in) :: npe 2130 integer(kind=kint), allocatable, intent(in) :: add_imp_index(:), add_imp_item_remote(:) 2131 integer(kind=kint), allocatable, intent(in) :: add_exp_index(:) 2132 integer(kind=kint), allocatable, intent(out) :: add_exp_item(:) 2133 integer(kind=kint), intent(in) :: mpi_comm 2134 integer(kind=kint) :: n_send, i, irank, is, ie, len, tag 2135 integer(kind=kint), allocatable :: requests(:) 2136 integer(kind=kint), allocatable :: statuses(:,:) 2137 allocate(add_exp_item(add_exp_index(npe))) 2138 allocate(requests(npe)) 2139 allocate(statuses(HECMW_STATUS_SIZE, npe)) 2140 n_send = 0 2141 do i = 1, npe 2142 irank = i-1 2143 is = add_imp_index(i-1)+1 2144 ie = add_imp_index(i) 2145 len = ie - is + 1 2146 if (len == 0) cycle 2147 tag = 4001 2148 n_send = n_send + 1 2149 call HECMW_ISEND_INT(add_imp_item_remote(is:ie), len, irank, tag, & 2150 mpi_comm, requests(n_send)) 2151 enddo 2152 ! 2153 do i = 1, npe 2154 irank = i-1 2155 is = add_exp_index(i-1)+1 2156 ie = add_exp_index(i) 2157 len = ie - is + 1 2158 if (len == 0) cycle 2159 tag = 4001 2160 call HECMW_RECV_INT(add_exp_item(is:ie), len, irank, tag, & 2161 mpi_comm, statuses(:,1)) 2162 enddo 2163 call HECMW_Waitall(n_send, requests, statuses) 2164 end subroutine send_recv_add_imp_exp_item 2165 2166 subroutine count_new_comm_nodes(npe, org_nnb, org_nbpe, org_index, n_add, n_new) 2167 implicit none 2168 integer(kind=kint), intent(in) :: npe, org_nnb 2169 !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), n_add(npe) 2170 integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:) 2171 integer(kind=kint), intent(in) :: n_add(:) 2172 integer(kind=kint), allocatable, intent(out) :: n_new(:) 2173 integer(kind=kint) :: i, irank, n_org 2174 allocate(n_new(npe)) 2175 n_new(:) = n_add(:) 2176 do i = 1, org_nnb 2177 irank = org_nbpe(i) 2178 n_org = org_index(i) - org_index(i-1) 2179 n_new(irank+1) = n_new(irank+1) + n_org 2180 enddo 2181 end subroutine count_new_comm_nodes 2182 2183 subroutine update_neighbor_pe(npe, n_new_imp, n_new_exp, & 2184 new_nnb, new_nbpe) 2185 implicit none 2186 integer(kind=kint), intent(in) :: npe 2187 integer(kind=kint), intent(in) :: n_new_imp(npe), n_new_exp(npe) 2188 integer(kind=kint), intent(out) :: new_nnb 2189 integer(kind=kint), pointer, intent(out) :: new_nbpe(:) 2190 integer(kind=kint) :: i 2191 new_nnb = 0 2192 do i = 1, npe 2193 if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) new_nnb = new_nnb+1 2194 enddo 2195 allocate(new_nbpe(new_nnb)) 2196 new_nnb = 0 2197 do i = 1, npe 2198 if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) then 2199 new_nnb = new_nnb+1 2200 new_nbpe(new_nnb) = i-1 2201 endif 2202 enddo 2203 end subroutine update_neighbor_pe 2204 2205 subroutine merge_comm_table(npe, org_nnb, org_nbpe, org_index, org_item, & 2206 new_nnb, new_nbpe, add_index, add_item, n_add, n_new, new_index, new_item) 2207 implicit none 2208 integer(kind=kint), intent(in) :: npe, org_nnb 2209 !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), org_item(:) 2210 integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:), org_item(:) 2211 integer(kind=kint), intent(in) :: new_nnb 2212 !integer(kind=kint), intent(in) :: new_nbpe(new_nnb), add_index(0:npe), add_item(:) 2213 integer(kind=kint), pointer, intent(in) :: new_nbpe(:) 2214 integer(kind=kint), allocatable, intent(in) :: add_index(:), add_item(:) 2215 integer(kind=kint), intent(in) :: n_add(npe), n_new(npe) 2216 integer(kind=kint), pointer, intent(out) :: new_index(:), new_item(:) 2217 integer(kind=kint), allocatable :: cnt(:) 2218 integer(kind=kint) :: i, irank, j, jrank, i0, j0, len 2219 ! if (associated(new_index)) deallocate(new_index) 2220 ! if (associated(new_item)) deallocate(new_item) 2221 allocate(new_index(0:new_nnb)) 2222 new_index(0) = 0 2223 do i = 1, new_nnb 2224 irank = new_nbpe(i) 2225 new_index(i) = new_index(i-1) + n_new(irank+1) 2226 enddo 2227 allocate(new_item(new_index(new_nnb))) 2228 allocate(cnt(npe)) 2229 cnt(:) = 0 2230 j = 1 2231 jrank = new_nbpe(j) 2232 do i = 1, org_nnb 2233 if (org_index(i) - org_index(i-1) == 0) cycle 2234 irank = org_nbpe(i) 2235 do while (jrank < irank) 2236 j = j + 1 2237 if (j > new_nnb) exit 2238 jrank = new_nbpe(j) 2239 enddo 2240 if (jrank /= irank) stop 'ERROR: merging comm table: org into new' 2241 i0 = org_index(i-1) 2242 len = org_index(i) - i0 2243 j0 = new_index(j-1) 2244 new_item(j0+1:j0+len) = org_item(i0+1:i0+len) 2245 cnt(jrank+1) = len 2246 enddo 2247 j = 1 2248 jrank = new_nbpe(j) 2249 do i = 1, npe 2250 if (n_add(i) == 0) cycle 2251 irank = i-1 2252 do while (jrank < irank) 2253 j = j + 1 2254 jrank = new_nbpe(j) 2255 enddo 2256 if (jrank /= irank) stop 'ERROR: merging comm table: add into new' 2257 i0 = add_index(i-1) 2258 len = add_index(i) - i0 2259 j0 = new_index(j-1) + cnt(jrank+1) 2260 new_item(j0+1:j0+len) = add_item(i0+1:i0+len) 2261 cnt(jrank+1) = cnt(jrank+1) + len 2262 if (cnt(jrank+1) /= new_index(j)-new_index(j-1)) stop 'ERROR: merging comm table' 2263 enddo 2264 deallocate(cnt) 2265 end subroutine merge_comm_table 2266 2267 subroutine copy_vals_to_BT_int(nnb, imp_rows_index, imp_cols_index, & 2268 imp_rows_item, map, ndof2, imp_vals_item, BT_int) 2269 implicit none 2270 integer(kind=kint), intent(in) :: nnb 2271 integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:) 2272 integer(kind=kint), intent(in) :: imp_rows_item(:,:), map(:) 2273 integer(kind=kint), intent(in) :: ndof2 2274 real(kind=kreal), intent(in) :: imp_vals_item(:) 2275 type (hecmwST_local_matrix), intent(inout) :: BT_int 2276 integer(kind=kint), allocatable :: cnt(:) 2277 integer(kind=kint) :: idom, is, ie, ic0, i, irow, ncol, j0, j 2278 allocate(cnt(BT_int%nr)) 2279 cnt(:) = 0 2280 do idom = 1, nnb 2281 is = imp_rows_index(idom-1)+1 2282 ie = imp_rows_index(idom) 2283 ic0 = imp_cols_index(idom-1) 2284 do i = is, ie 2285 irow = imp_rows_item(1,i) 2286 ncol = imp_rows_item(2,i) 2287 if (irow < 1 .or. BT_int%nr < irow) stop 'ERROR: copy vals to BT_int: irow' 2288 j0 = BT_int%index(irow-1) + cnt(irow) 2289 do j = 1, ncol 2290 BT_int%item(j0+j) = map(ic0+j) 2291 BT_int%A(ndof2*(j0+j-1)+1:ndof2*(j0+j)) = imp_vals_item(ndof2*(ic0+j-1)+1:ndof2*(ic0+j)) 2292 enddo 2293 cnt(irow) = cnt(irow) + ncol 2294 ic0 = ic0 + ncol 2295 enddo 2296 if (ic0 /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_int: ic0' 2297 enddo 2298 deallocate(cnt) 2299 end subroutine copy_vals_to_BT_int 2300 2301 subroutine sort_and_uniq_rows(BTmat) 2302 use hecmw_array_util 2303 implicit none 2304 type (hecmwST_local_matrix), intent(inout) :: BTmat 2305 integer(kind=kint) :: nr, ndof, ndof2 2306 integer(kind=kint) :: irow, is, ie, is_new, ie_new, i, i_new 2307 integer(kind=kint) :: ndup, ndup_tot 2308 integer(kind=kint) :: js, je, js_new, je_new 2309 integer(kind=kint) :: new_nnz 2310 integer(kind=kint), allocatable :: cnt(:) 2311 integer(kind=kint), pointer :: sort_item(:), new_index(:), new_item(:) 2312 real(kind=kreal), pointer :: new_A(:) 2313 logical :: sorted 2314 real(kind=kreal) :: t0, t1 2315 t0 = hecmw_wtime() 2316 nr = BTmat%nr 2317 ! check if already sorted 2318 sorted = .true. 2319 OUTER: do irow = 1, nr 2320 is = BTmat%index(irow-1)+1 2321 ie = BTmat%index(irow) 2322 do i = is, ie-1 2323 if (BTmat%item(i) >= BTmat%item(i+1)) then 2324 sorted = .false. 2325 exit OUTER 2326 endif 2327 enddo 2328 end do OUTER 2329 t1 = hecmw_wtime() 2330 if (TIMER >= 4) write(0, '(A,f10.4,L2)') "####### sort_and_uniq_rows (1) : ",t1-t0,sorted 2331 t0 = hecmw_wtime() 2332 if (sorted) return 2333 ! perform sort 2334 ndof = BTmat%ndof 2335 ndof2 = ndof*ndof 2336 ! duplicate item array (sort_item) 2337 allocate(sort_item(BTmat%nnz)) 2338 do i = 1, BTmat%nnz 2339 sort_item(i) = BTmat%item(i) 2340 enddo 2341 ! sort and uniq item for each row 2342 allocate(cnt(nr)) 2343 ndup_tot = 0 2344 !$omp parallel do default(none), & 2345 !$omp& schedule(dynamic,1), & 2346 !$omp& private(irow,is,ie,ndup), & 2347 !$omp& shared(nr,BTmat,sort_item,cnt), & 2348 !$omp& reduction(+:ndup_tot) 2349 do irow = 1, nr 2350 is = BTmat%index(irow-1)+1 2351 ie = BTmat%index(irow) 2352 call hecmw_qsort_int_array(sort_item, is, ie) 2353 call hecmw_uniq_int_array(sort_item, is, ie, ndup) 2354 cnt(irow) = (ie-is+1) - ndup 2355 ndup_tot = ndup_tot + ndup 2356 enddo 2357 !$omp end parallel do 2358 t1 = hecmw_wtime() 2359 if (TIMER >= 4) write(0, '(A,f10.4,I5)') "####### sort_and_uniq_rows (2) : ",t1-t0,ndup_tot 2360 t0 = hecmw_wtime() 2361 ! make new index and item array (new_index, new_item) 2362 if (ndup_tot == 0) then 2363 new_index => BTmat%index 2364 new_nnz = BTmat%nnz 2365 new_item => sort_item 2366 else 2367 allocate(new_index(0:nr)) 2368 call make_index(nr, cnt, new_index) 2369 new_nnz = new_index(nr) 2370 allocate(new_item(new_nnz)) 2371 do irow = 1, nr 2372 is = BTmat%index(irow-1)+1 2373 ie = is+cnt(irow)-1 2374 is_new = new_index(irow-1)+1 2375 ie_new = is_new+cnt(irow)-1 2376 new_item(is_new:ie_new) = sort_item(is:ie) 2377 enddo 2378 deallocate(sort_item) 2379 endif 2380 deallocate(cnt) 2381 t1 = hecmw_wtime() 2382 if (TIMER >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (3) : ",t1-t0 2383 t0 = hecmw_wtime() 2384 ! allocate and clear value array (new_A) 2385 allocate(new_A(ndof2*new_nnz)) 2386 new_A(:) = 0.d0 2387 ! copy/add value from old A to new A 2388 !$omp parallel do default(none), & 2389 !$omp& schedule(dynamic,1), & 2390 !$omp& private(irow,is,ie,is_new,ie_new,i,i_new,js,je,js_new,je_new), & 2391 !$omp& shared(nr,BTmat,new_index,new_item,ndof2,new_A) 2392 do irow = 1, nr 2393 is = BTmat%index(irow-1)+1 2394 ie = BTmat%index(irow) 2395 is_new = new_index(irow-1)+1 2396 ie_new = new_index(irow) 2397 ! for each item in row 2398 do i = is, ie 2399 ! find place in new item 2400 call hecmw_bsearch_int_array(new_item, is_new, ie_new, BTmat%item(i), i_new) 2401 if (i_new == -1) stop 'ERROR: sort_and_uniq_rows' 2402 js = ndof2*(i-1)+1 2403 je = ndof2*i 2404 js_new = ndof2*(i_new-1)+1 2405 je_new = ndof2*i_new 2406 new_A(js_new:je_new) = new_A(js_new:je_new) + BTmat%A(js:je) 2407 enddo 2408 enddo 2409 !$omp end parallel do 2410 t1 = hecmw_wtime() 2411 if (TIMER >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (4) : ",t1-t0 2412 t0 = hecmw_wtime() 2413 ! deallocate/update nnz, index, item, A 2414 if (ndup_tot == 0) then 2415 deallocate(BTmat%item) 2416 BTmat%item => new_item 2417 deallocate(BTmat%A) 2418 BTmat%A => new_A 2419 else 2420 BTmat%nnz = new_nnz 2421 deallocate(BTmat%index) 2422 BTmat%index => new_index 2423 deallocate(BTmat%item) 2424 BTmat%item => new_item 2425 deallocate(BTmat%A) 2426 BTmat%A => new_A 2427 endif 2428 end subroutine sort_and_uniq_rows 2429 2430 subroutine hecmw_localmat_add(Amat, Bmat, Cmat) 2431 implicit none 2432 type (hecmwST_local_matrix), intent(in) :: Amat 2433 type (hecmwST_local_matrix), intent(in) :: Bmat 2434 type (hecmwST_local_matrix), intent(out) :: Cmat 2435 integer(kind=kint) :: ndof, ndof2, nr, nc, i, icnt, js, je, j, jcol, idx, i0, k 2436 integer(kind=kint), allocatable :: iw(:) 2437 if (Amat%ndof /= Bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof' 2438 ndof = Amat%ndof 2439 ndof2 = ndof*ndof 2440 nr = min(Amat%nr, Bmat%nr) 2441 nc = max(Amat%nc, Bmat%nc) 2442 Cmat%ndof = ndof 2443 Cmat%nr = nr 2444 Cmat%nc = nc 2445 Cmat%nnz = 0 2446 allocate(Cmat%index(0:nr)) 2447 Cmat%index(0) = 0 2448 allocate(iw(nc)) 2449 do i = 1, nr 2450 icnt = 0 2451 ! Amat 2452 js = Amat%index(i-1)+1 2453 je = Amat%index(i) 2454 do j = js, je 2455 jcol = Amat%item(j) 2456 icnt = icnt + 1 2457 iw(icnt) = jcol 2458 enddo 2459 ! Bmat 2460 js = Bmat%index(i-1)+1 2461 je = Bmat%index(i) 2462 lj1: do j = js, je 2463 jcol = Bmat%item(j) 2464 do k = 1, icnt 2465 if (iw(k) == jcol) cycle lj1 2466 enddo 2467 icnt = icnt + 1 2468 iw(icnt) = jcol 2469 enddo lj1 2470 Cmat%index(i) = Cmat%index(i-1) + icnt 2471 enddo 2472 Cmat%nnz = Cmat%index(nr) 2473 allocate(Cmat%item(Cmat%nnz)) 2474 allocate(Cmat%A(ndof2*Cmat%nnz)) 2475 do i = 1, nr 2476 i0 = Cmat%index(i-1) 2477 icnt = 0 2478 ! Amat 2479 js = Amat%index(i-1)+1 2480 je = Amat%index(i) 2481 do j = js, je 2482 jcol = Amat%item(j) 2483 icnt = icnt + 1 2484 idx = i0 + icnt 2485 Cmat%item(idx) = jcol 2486 Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Amat%A(ndof2*(j-1)+1:ndof2*j) 2487 enddo 2488 ! Bmat 2489 js = Bmat%index(i-1)+1 2490 je = Bmat%index(i) 2491 lj2: do j = js, je 2492 jcol = Bmat%item(j) 2493 do k = 1, icnt 2494 idx = i0 + k 2495 if (Cmat%item(idx) == jcol) then 2496 Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = & 2497 Cmat%A(ndof2*(idx-1)+1:ndof2*idx) + Bmat%A(ndof2*(j-1)+1:ndof2*j) 2498 cycle lj2 2499 endif 2500 enddo 2501 icnt = icnt + 1 2502 idx = i0 + icnt 2503 Cmat%item(idx) = jcol 2504 Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Bmat%A(ndof2*(j-1)+1:ndof2*j) 2505 enddo lj2 2506 if (i0 + icnt /= Cmat%index(i)) stop 'ERROR: merge localmat' 2507 enddo 2508 call sort_and_uniq_rows(Cmat) 2509 end subroutine hecmw_localmat_add 2510 2511 ! subroutine hecmw_localmat_add(Amat, Bmat, Cmat) 2512 ! implicit none 2513 ! type (hecmwST_local_matrix), intent(in) :: Amat 2514 ! type (hecmwST_local_matrix), intent(in) :: Bmat 2515 ! type (hecmwST_local_matrix), intent(out) :: Cmat 2516 ! integer(kind=kint) :: ndof, ndof2, nr, nc, i, js, je, j, jcol, nnz_row, idx, ks, ke, k, kcol 2517 ! if (Amat%ndof /= Bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof' 2518 ! ndof = Amat%ndof 2519 ! ndof2 = ndof*ndof 2520 ! nr = min(Amat%nr, Bmat%nr) 2521 ! nc = max(Amat%nc, Bmat%nc) 2522 ! Cmat%ndof = ndof 2523 ! Cmat%nr = nr 2524 ! Cmat%nc = nc 2525 ! Cmat%nnz = Amat%index(nr) + Bmat%index(nr) 2526 ! allocate(Cmat%index(0:nr)) 2527 ! allocate(Cmat%item(Cmat%nnz)) 2528 ! allocate(Cmat%A(ndof2 * Cmat%nnz)) 2529 ! Cmat%index(0) = 0 2530 ! idx = 0 2531 ! do i = 1, nr 2532 ! ! Amat 2533 ! js = Amat%index(i-1)+1 2534 ! je = Amat%index(i) 2535 ! do j = js, je 2536 ! idx = idx + 1 2537 ! Cmat%item(idx) = Amat%item(j) 2538 ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Amat%A(ndof2*(j-1)+1:ndof2*j) 2539 ! enddo 2540 ! ! Bmat 2541 ! js = Bmat%index(i-1)+1 2542 ! je = Bmat%index(i) 2543 ! do j = js, je 2544 ! idx = idx + 1 2545 ! Cmat%item(idx) = Bmat%item(j) 2546 ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Bmat%A(ndof2*(j-1)+1:ndof2*j) 2547 ! enddo 2548 ! Cmat%index(i) = idx 2549 ! enddo 2550 ! if (Cmat%index(nr) /= Cmat%nnz) stop 'ERROR: merge localmat' 2551 ! call sort_and_uniq_rows(Cmat) 2552 ! end subroutine hecmw_localmat_add 2553 2554 subroutine hecmw_localmat_init_with_hecmat(BKmat, hecMAT, num_lagrange) 2555 implicit none 2556 type (hecmwST_local_matrix), intent(inout) :: BKmat 2557 type (hecmwST_matrix), intent(in) :: hecMAT 2558 integer(kind=kint), optional, intent(in) :: num_lagrange 2559 integer(kind=kint) :: ndof, ndof2, i, idx, idx2, js, je, j, k 2560 integer(kind=kint), allocatable :: incl_nz(:), cnt(:) 2561 logical :: check_nonzero 2562 !check_nonzero = .false. 2563 check_nonzero = .true. !!! always checking nonzero seems to be faster 2564 ! 2565 ndof = hecMAT%NDOF 2566 ndof2 = ndof*ndof 2567 ! nr, nc, nnz 2568 BKmat%nr = hecMAT%NP 2569 BKmat%nc = hecMAT%NP 2570 BKmat%ndof = ndof 2571 ! 2572 if (present(num_lagrange)) then !!! TEMPORARY (DUE TO WRONG conMAT WHEN num_lagrange==0) !!! 2573 if (num_lagrange == 0) then 2574 BKmat%nnz = 0 2575 allocate(BKmat%index(0:BKmat%nr)) 2576 BKmat%index(:) = 0 2577 BKmat%item => null() 2578 BKmat%A => null() 2579 return 2580 endif 2581 check_nonzero = .true. 2582 endif 2583 ! 2584 if (check_nonzero) then 2585 allocate(incl_nz(hecMAT%NPL + hecMAT%NPU + hecMAT%NP)) 2586 allocate(cnt(BKmat%nr)) 2587 incl_nz(:) = 0 2588 !$omp parallel default(none), & 2589 !$omp& private(i,idx,js,je,j,k), & 2590 !$omp& shared(BKmat,hecMAT,cnt,ndof2,incl_nz) 2591 !$omp do 2592 do i = 1, BKmat%nr 2593 idx = hecMAT%indexL(i-1) + (i-1) + hecMAT%indexU(i-1) 2594 cnt(i) = 0 2595 ! lower 2596 js = hecMAT%indexL(i-1)+1 2597 je = hecMAT%indexL(i) 2598 do j = js, je 2599 idx = idx + 1 2600 do k = 1, ndof2 2601 if (hecMAT%AL(ndof2*(j-1)+k) /= 0.0d0) then 2602 incl_nz(idx) = 1 2603 cnt(i) = cnt(i) + 1 2604 exit 2605 endif 2606 enddo 2607 enddo 2608 ! diag 2609 idx = idx + 1 2610 do k = 1, ndof2 2611 if (hecMAT%D(ndof2*(i-1)+k) /= 0.0d0) then 2612 incl_nz(idx) = 1 2613 cnt(i) = cnt(i) + 1 2614 exit 2615 endif 2616 enddo 2617 ! upper 2618 js = hecMAT%indexU(i-1)+1 2619 je = hecMAT%indexU(i) 2620 do j = js, je 2621 idx = idx + 1 2622 do k = 1, ndof2 2623 if (hecMAT%AU(ndof2*(j-1)+k) /= 0.0d0) then 2624 incl_nz(idx) = 1 2625 cnt(i) = cnt(i) + 1 2626 exit 2627 endif 2628 enddo 2629 enddo 2630 if (idx /= hecMAT%indexL(i) + i + hecMAT%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: count' 2631 enddo 2632 !$omp end do 2633 !$omp end parallel 2634 ! index 2635 allocate(BKmat%index(0:BKmat%nr)) 2636 call make_index(BKmat%nr, cnt, BKmat%index) 2637 deallocate(cnt) 2638 BKmat%nnz = BKmat%index(BKmat%nr) 2639 ! item, A 2640 allocate(BKmat%item(BKmat%nnz)) 2641 allocate(BKmat%A(ndof2 * BKmat%nnz)) 2642 !$omp parallel default(none), & 2643 !$omp& private(i,idx,idx2,js,je,j), & 2644 !$omp& shared(BKmat,hecMAT,ndof2,incl_nz) 2645 !$omp do 2646 do i = 1, BKmat%nr 2647 idx = hecMAT%indexL(i-1) + (i-1) + hecMAT%indexU(i-1) 2648 idx2 = BKmat%index(i-1) 2649 ! lower 2650 js = hecMAT%indexL(i-1)+1 2651 je = hecMAT%indexL(i) 2652 do j = js, je 2653 idx = idx + 1 2654 if (incl_nz(idx) == 1) then 2655 idx2 = idx2 + 1 2656 BKmat%item(idx2) = hecMAT%itemL(j) 2657 BKmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecMAT%AL(ndof2*(j-1)+1:ndof2*j) 2658 endif 2659 enddo 2660 ! diag 2661 idx = idx + 1 2662 if (incl_nz(idx) == 1) then 2663 idx2 = idx2 + 1 2664 BKmat%item(idx2) = i 2665 BKmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecMAT%D(ndof2*(i-1)+1:ndof2*i) 2666 endif 2667 ! upper 2668 js = hecMAT%indexU(i-1)+1 2669 je = hecMAT%indexU(i) 2670 do j = js, je 2671 idx = idx + 1 2672 if (incl_nz(idx) == 1) then 2673 idx2 = idx2 + 1 2674 BKmat%item(idx2) = hecMAT%itemU(j) 2675 BKmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecMAT%AU(ndof2*(j-1)+1:ndof2*j) 2676 endif 2677 enddo 2678 if (idx /= hecMAT%indexL(i) + i + hecMAT%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy' 2679 if (idx2 /= BKmat%index(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: index' 2680 enddo 2681 !$omp end do 2682 !$omp end parallel 2683 deallocate(incl_nz) 2684 else 2685 BKmat%nnz = hecMAT%NPL + hecMAT%NP + hecMAT%NPU 2686 allocate(BKmat%index(0:BKmat%nr)) 2687 allocate(BKmat%item(BKmat%nnz)) 2688 allocate(BKmat%A(ndof2 * BKmat%nnz)) 2689 BKmat%index(0) = 0 2690 !$omp parallel do default(none), & 2691 !$omp& private(i,idx,js,je,j), & 2692 !$omp& shared(BKmat,hecMAT,ndof2) 2693 do i = 1, BKmat%nr 2694 idx = hecMAT%indexL(i-1) + (i-1) + hecMAT%indexU(i-1) 2695 ! lower 2696 js = hecMAT%indexL(i-1)+1 2697 je = hecMAT%indexL(i) 2698 do j = js, je 2699 idx = idx + 1 2700 BKmat%item(idx) = hecMAT%itemL(j) 2701 BKmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecMAT%AL(ndof2*(j-1)+1:ndof2*j) 2702 enddo 2703 ! diag 2704 idx = idx + 1 2705 BKmat%item(idx) = i 2706 BKmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecMAT%D(ndof2*(i-1)+1:ndof2*i) 2707 ! upper 2708 js = hecMAT%indexU(i-1)+1 2709 je = hecMAT%indexU(i) 2710 do j = js, je 2711 idx = idx + 1 2712 BKmat%item(idx) = hecMAT%itemU(j) 2713 BKmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecMAT%AU(ndof2*(j-1)+1:ndof2*j) 2714 enddo 2715 BKmat%index(i) = idx 2716 if (idx /= hecMAT%indexL(i) + i + hecMAT%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy' 2717 enddo 2718 !$omp end parallel do 2719 endif 2720 end subroutine hecmw_localmat_init_with_hecmat 2721 2722 subroutine hecmw_localmat_add_hecmat(BKmat, hecMAT) 2723 implicit none 2724 type (hecmwST_local_matrix), intent(inout) :: BKmat 2725 type (hecmwST_matrix), intent(in) :: hecMAT 2726 type (hecmwST_local_matrix) :: W1mat, W2mat 2727 !! Should Be Simple If Non-Zero Profile Is Kept !! 2728 call hecmw_localmat_init_with_hecmat(W1mat, hecMAT) 2729 if (DEBUG >= 3) then 2730 write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)' 2731 if (DEBUG == 3) then 2732 call hecmw_localmat_write_ij(W1mat, 700+hecmw_comm_get_rank()) 2733 else 2734 call hecmw_localmat_write(W1mat, 700+hecmw_comm_get_rank()) 2735 endif 2736 endif 2737 call hecmw_localmat_add(BKmat, W1mat, W2mat) 2738 call hecmw_localmat_free(BKmat) 2739 call hecmw_localmat_free(W1mat) 2740 BKmat%nr = W2mat%nr 2741 BKmat%nc = W2mat%nc 2742 BKmat%nnz = W2mat%nnz 2743 BKmat%ndof = W2mat%ndof 2744 BKmat%index => W2mat%index 2745 BKmat%item => W2mat%item 2746 BKmat%A => W2mat%A 2747 end subroutine hecmw_localmat_add_hecmat 2748 2749 subroutine hecmw_localmat_multmat(BKmat, BTmat, hecMESH, BKTmat) 2750 implicit none 2751 type (hecmwST_local_matrix), intent(inout) :: BKmat 2752 type (hecmwST_local_matrix), intent(inout) :: BTmat 2753 type (hecmwST_local_mesh), intent(inout) :: hecMESH 2754 type (hecmwST_local_matrix), intent(out) :: BKTmat 2755 type (hecmwST_matrix_comm) :: hecCOMM 2756 type (hecmwST_local_mesh) :: hecMESHnew 2757 type (hecmwST_local_matrix), allocatable :: BT_exp(:) 2758 type (hecmwST_local_matrix) :: BT_imp, BT_all 2759 integer(kind=kint), allocatable :: exp_cols_index(:) 2760 integer(kind=kint), allocatable :: exp_cols_item(:,:) 2761 real(kind=kreal) :: t0, t1 2762 t0 = hecmw_wtime() 2763 ! 2764 call make_comm_table(BKmat, hecMESH, hecCOMM) 2765 if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: make_comm_table done' 2766 t1 = hecmw_wtime() 2767 if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (1) : ',t1-t0 2768 t0 = hecmw_wtime() 2769 ! 2770 if (BTmat%nr > hecMESH%nn_internal) then 2771 ! consider only internal part of BTmat 2772 if (DEBUG >= 1) write(0,'(A)') 'DEBUG: hecmw_localmat_multmat: ignore external part of BTmat' 2773 BTmat%nr = hecMESH%nn_internal 2774 BTmat%nnz = BTmat%index(BTmat%nr) 2775 endif 2776 ! 2777 call extract_BT_exp(BTmat, hecCOMM, BT_exp) 2778 if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: extract_BT_exp done' 2779 t1 = hecmw_wtime() 2780 if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (2) : ',t1-t0 2781 t0 = hecmw_wtime() 2782 ! 2783 call prepare_column_info(hecMESH, BT_exp, exp_cols_index, exp_cols_item) 2784 if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: prepare column info done' 2785 t1 = hecmw_wtime() 2786 if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (3) : ',t1-t0 2787 t0 = hecmw_wtime() 2788 ! 2789 call send_BT_exp_and_recv_BT_imp(hecMESH, hecCOMM, BT_exp, exp_cols_index, exp_cols_item, BT_imp, hecMESHnew) 2790 if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: send BT_exp and recv BT_imp done' 2791 t1 = hecmw_wtime() 2792 if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (4) : ',t1-t0 2793 t0 = hecmw_wtime() 2794 call free_comm_table(hecCOMM) 2795 ! 2796 call concat_BTmat_and_BT_imp(BTmat, BT_imp, BT_all) 2797 if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: concat BTmat and BT_imp into BT_all done' 2798 t1 = hecmw_wtime() 2799 if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (5) : ',t1-t0 2800 t0 = hecmw_wtime() 2801 call hecmw_localmat_free(BT_imp) 2802 ! 2803 call multiply_mat_mat(BKmat, BT_all, BKTmat) 2804 if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: multiply BKmat and BT_all into BKTmat done' 2805 t1 = hecmw_wtime() 2806 if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (6) : ',t1-t0 2807 t0 = hecmw_wtime() 2808 call hecmw_localmat_free(BT_all) 2809 ! 2810 if (hecMESH%n_neighbor_pe > 0) then 2811 hecMESH%n_node = hecMESHnew%n_node 2812 hecMESH%n_neighbor_pe = hecMESHnew%n_neighbor_pe 2813 deallocate(hecMESH%neighbor_pe) 2814 deallocate(hecMESH%import_index) 2815 deallocate(hecMESH%export_index) 2816 deallocate(hecMESH%import_item) 2817 deallocate(hecMESH%export_item) 2818 deallocate(hecMESH%node_ID) 2819 deallocate(hecMESH%global_node_ID) 2820 hecMESH%neighbor_pe => hecMESHnew%neighbor_pe 2821 hecMESH%import_index => hecMESHnew%import_index 2822 hecMESH%export_index => hecMESHnew%export_index 2823 hecMESH%import_item => hecMESHnew%import_item 2824 hecMESH%export_item => hecMESHnew%export_item 2825 hecMESH%node_ID => hecMESHnew%node_ID 2826 hecMESH%global_node_ID => hecMESHnew%global_node_ID 2827 if (DEBUG >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: update hecMESH done' 2828 t1 = hecmw_wtime() 2829 if (TIMER >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (7) : ',t1-t0 2830 endif 2831 end subroutine hecmw_localmat_multmat 2832 2833 subroutine make_comm_table(BKmat, hecMESH, hecCOMM) 2834 use m_hecmw_comm_f 2835 implicit none 2836 type (hecmwST_local_matrix), intent(in) :: BKmat 2837 type (hecmwST_local_mesh), intent(in) :: hecMESH 2838 type (hecmwST_matrix_comm), intent(out) :: hecCOMM 2839 integer(kind=kint) :: nn_int, nn_ext, nnb, i, icol, irank, idom, idx, n_send, tag, js, je, len 2840 integer(kind=kint), allocatable :: is_nz_col(:), imp_cnt(:), exp_cnt(:), import_item_remote(:) 2841 integer(kind=kint), allocatable :: requests(:), statuses(:,:) 2842 hecCOMM%zero = hecMESH%zero 2843 hecCOMM%HECMW_COMM = hecMESH%MPI_COMM 2844 hecCOMM%PETOT = hecMESH%PETOT 2845 hecCOMM%PEsmpTOT = hecMESH%PEsmpTOT 2846 hecCOMM%my_rank = hecMESH%my_rank 2847 hecCOMM%errnof = hecMESH%errnof 2848 hecCOMM%n_subdomain = hecMESH%n_subdomain 2849 hecCOMM%n_neighbor_pe = hecMESH%n_neighbor_pe 2850 allocate(hecCOMM%neighbor_pe(hecCOMM%n_neighbor_pe)) 2851 hecCOMM%neighbor_pe(:) = hecMESH%neighbor_pe(:) 2852 ! 2853 nn_int = hecMESH%nn_internal 2854 nn_ext = hecMESH%n_node - hecMESH%nn_internal 2855 nnb = hecCOMM%n_neighbor_pe 2856 ! 2857 ! check_external_nz_cols (by profile (not number)) 2858 allocate(is_nz_col(nn_ext)) 2859 is_nz_col(:) = 0 2860 do i = 1, BKmat%index(nn_int) 2861 icol = BKmat%item(i) 2862 if (icol > nn_int) is_nz_col(icol - nn_int) = 1 2863 enddo 2864 ! 2865 ! count_nz_cols_per_rank 2866 allocate(imp_cnt(nnb)) 2867 imp_cnt(:) = 0 2868 do i = 1, nn_ext 2869 if (is_nz_col(i) == 1) then 2870 irank = hecMESH%node_ID(2*(nn_int+i)) 2871 call rank_to_idom(hecMESH, irank, idom) 2872 imp_cnt(idom) = imp_cnt(idom) + 1 2873 endif 2874 enddo 2875 if (DEBUG >= 3) write(0,*) ' DEBUG3: imp_cnt',imp_cnt(:) 2876 ! 2877 ! make_index 2878 allocate(hecCOMM%import_index(0:nnb)) 2879 call make_index(nnb, imp_cnt, hecCOMM%import_index) 2880 if (DEBUG >= 3) write(0,*) ' DEBUG3: import_index',hecCOMM%import_index(:) 2881 ! 2882 ! fill item 2883 allocate(hecCOMM%import_item(hecCOMM%import_index(nnb))) 2884 imp_cnt(:) = 0 2885 do i = 1, nn_ext 2886 if (is_nz_col(i) == 1) then 2887 irank = hecMESH%node_ID(2*(nn_int+i)) 2888 call rank_to_idom(hecMESH, irank, idom) 2889 imp_cnt(idom) = imp_cnt(idom) + 1 2890 idx = hecCOMM%import_index(idom-1)+imp_cnt(idom) 2891 hecCOMM%import_item(idx) = nn_int+i 2892 endif 2893 enddo 2894 if (DEBUG >= 3) write(0,*) ' DEBUG3: import_item',hecCOMM%import_item(:) 2895 ! 2896 allocate(import_item_remote(hecCOMM%import_index(nnb))) 2897 do i = 1, hecCOMM%import_index(nnb) 2898 import_item_remote(i) = hecMESH%node_ID(2*hecCOMM%import_item(i)-1) 2899 enddo 2900 if (DEBUG >= 3) write(0,*) ' DEBUG3: import_item_remote',import_item_remote(:) 2901 ! 2902 allocate(requests(2*nnb)) 2903 allocate(statuses(HECMW_STATUS_SIZE, 2*nnb)) 2904 ! 2905 ! send/recv 2906 n_send = 0 2907 do idom = 1, nnb 2908 irank = hecCOMM%neighbor_pe(idom) 2909 n_send = n_send + 1 2910 tag = 6001 2911 call HECMW_ISEND_INT(imp_cnt(idom), 1, irank, tag, hecCOMM%HECMW_COMM, requests(n_send)) 2912 if (imp_cnt(idom) > 0) then 2913 js = hecCOMM%import_index(idom-1)+1 2914 je = hecCOMM%import_index(idom) 2915 len = je-js+1 2916 n_send = n_send + 1 2917 tag = 6002 2918 call HECMW_ISEND_INT(import_item_remote(js:je), len, irank, tag, & 2919 hecCOMM%HECMW_COMM, requests(n_send)) 2920 endif 2921 enddo 2922 ! 2923 ! index 2924 allocate(exp_cnt(nnb)) 2925 do idom = 1, nnb 2926 irank = hecCOMM%neighbor_pe(idom) 2927 tag = 6001 2928 call HECMW_RECV_INT(exp_cnt(idom), 1, irank, tag, hecCOMM%HECMW_COMM, statuses(:,1)) 2929 enddo 2930 allocate(hecCOMM%export_index(0:nnb)) 2931 call make_index(nnb, exp_cnt, hecCOMM%export_index) 2932 if (DEBUG >= 3) write(0,*) ' DEBUG3: export_index',hecCOMM%export_index(:) 2933 ! 2934 ! item 2935 allocate(hecCOMM%export_item(hecCOMM%export_index(nnb))) 2936 do idom = 1, nnb 2937 if (exp_cnt(idom) <= 0) cycle 2938 irank = hecCOMM%neighbor_pe(idom) 2939 js = hecCOMM%export_index(idom-1)+1 2940 je = hecCOMM%export_index(idom) 2941 len = je-js+1 2942 tag = 6002 2943 call HECMW_RECV_INT(hecCOMM%export_item(js:je), len, irank, tag, & 2944 hecCOMM%HECMW_COMM, statuses(:,1)) 2945 enddo 2946 if (DEBUG >= 3) write(0,*) ' DEBUG3: export_item',hecCOMM%export_item(:) 2947 call HECMW_Waitall(n_send, requests, statuses) 2948 ! 2949 deallocate(imp_cnt) 2950 deallocate(exp_cnt) 2951 deallocate(import_item_remote) 2952 end subroutine make_comm_table 2953 2954 subroutine free_comm_table(hecCOMM) 2955 implicit none 2956 type (hecmwST_matrix_comm), intent(inout) :: hecCOMM 2957 deallocate(hecCOMM%neighbor_pe) 2958 deallocate(hecCOMM%import_index) 2959 deallocate(hecCOMM%import_item) 2960 deallocate(hecCOMM%export_index) 2961 deallocate(hecCOMM%export_item) 2962 end subroutine free_comm_table 2963 2964 subroutine extract_BT_exp(BTmat, hecCOMM, BT_exp) 2965 implicit none 2966 type (hecmwST_local_matrix), intent(in) :: BTmat 2967 type (hecmwST_matrix_comm), intent(in) :: hecCOMM 2968 type (hecmwST_local_matrix), allocatable, intent(out) :: BT_exp(:) 2969 integer(kind=kint) :: ndof, ndof2, idom, idx_0, idx_n, j, jrow, nnz_row, idx, ks, ke, k 2970 if (hecCOMM%n_neighbor_pe == 0) return 2971 allocate(BT_exp(hecCOMM%n_neighbor_pe)) 2972 ndof = BTmat%ndof 2973 ndof2 = ndof * ndof 2974 do idom = 1, hecCOMM%n_neighbor_pe 2975 idx_0 = hecCOMM%export_index(idom-1) 2976 idx_n = hecCOMM%export_index(idom) 2977 BT_exp(idom)%nr = idx_n - idx_0 2978 BT_exp(idom)%nc = BTmat%nc 2979 BT_exp(idom)%nnz = 0 2980 BT_exp(idom)%ndof = ndof 2981 allocate(BT_exp(idom)%index(0:BT_exp(idom)%nr)) 2982 BT_exp(idom)%index(0) = 0 2983 do j = 1, BT_exp(idom)%nr 2984 jrow = hecCOMM%export_item(idx_0 + j) 2985 nnz_row = BTmat%index(jrow) - BTmat%index(jrow-1) 2986 BT_exp(idom)%index(j) = BT_exp(idom)%index(j-1) + nnz_row 2987 enddo 2988 BT_exp(idom)%nnz = BT_exp(idom)%index(BT_exp(idom)%nr) 2989 allocate(BT_exp(idom)%item(BT_exp(idom)%nnz)) 2990 allocate(BT_exp(idom)%A(ndof2 * BT_exp(idom)%nnz)) 2991 idx = 0 2992 do j = 1, BT_exp(idom)%nr 2993 jrow = hecCOMM%export_item(idx_0 + j) 2994 ks = BTmat%index(jrow-1) + 1 2995 ke = BTmat%index(jrow) 2996 do k = ks, ke 2997 idx = idx + 1 2998 BT_exp(idom)%item(idx) = BTmat%item(k) 2999 BT_exp(idom)%A(ndof2*(idx-1)+1:ndof2*idx) = BTmat%A(ndof2*(k-1)+1:ndof2*k) 3000 enddo 3001 if (idx /= BT_exp(idom)%index(j)) stop 'ERROR: extract BT_exp' 3002 enddo 3003 enddo 3004 end subroutine extract_BT_exp 3005 3006 subroutine send_BT_exp_and_recv_BT_imp(hecMESH, hecCOMM, BT_exp, exp_cols_index, exp_cols_item, BT_imp, hecMESHnew) 3007 use m_hecmw_comm_f 3008 implicit none 3009 type (hecmwST_local_mesh), intent(in) :: hecMESH 3010 type (hecmwST_matrix_comm), intent(in) :: hecCOMM 3011 type (hecmwST_local_matrix), allocatable, intent(inout) :: BT_exp(:) 3012 integer(kind=kint), allocatable, intent(inout) :: exp_cols_index(:) 3013 integer(kind=kint), allocatable, intent(inout) :: exp_cols_item(:,:) 3014 type (hecmwST_local_matrix), intent(out) :: BT_imp 3015 type (hecmwST_local_mesh), intent(inout) :: hecMESHnew 3016 integer(kind=kint), allocatable :: nnz_imp(:), cnt(:), index_imp(:) 3017 integer(kind=kint), allocatable :: imp_cols_index(:) 3018 integer(kind=kint), allocatable :: imp_cols_item(:,:) 3019 real(kind=kreal), allocatable :: imp_vals_item(:) 3020 integer(kind=kint) :: nnb, ndof, ndof2, idom, irank, nr, n_send, tag, idx_0, idx_n, j, jj, nnz 3021 integer(kind=kint), allocatable :: requests(:) 3022 integer(kind=kint), allocatable :: statuses(:,:) 3023 integer(kind=kint), allocatable :: map(:), add_nodes(:,:) 3024 integer(kind=kint) :: n_add_node, i0 3025 nnb = hecCOMM%n_neighbor_pe 3026 if (nnb == 0) then 3027 BT_imp%nr = 0 3028 BT_imp%nc = 0 3029 BT_imp%nnz = 0 3030 BT_imp%ndof = 0 3031 allocate(BT_imp%index(0:0)) 3032 BT_imp%index(0) = 0 3033 return 3034 endif 3035 ndof = BT_exp(1)%ndof 3036 ndof2 = ndof*ndof 3037 allocate(requests(nnb*3)) 3038 allocate(statuses(HECMW_STATUS_SIZE, nnb*3)) 3039 n_send = 0 3040 do idom = 1, nnb 3041 irank = hecCOMM%neighbor_pe(idom) 3042 nr = BT_exp(idom)%nr 3043 if (nr == 0) cycle 3044 n_send = n_send + 1 3045 tag = 3001 3046 call HECMW_ISEND_INT(BT_exp(idom)%index(0:BT_exp(idom)%nr), BT_exp(idom)%nr + 1, & 3047 irank, tag, hecCOMM%HECMW_COMM, requests(n_send)) 3048 if (BT_exp(idom)%nnz == 0) cycle 3049 n_send = n_send + 1 3050 tag = 3002 3051 call HECMW_ISEND_INT(exp_cols_item(1,exp_cols_index(idom-1)+1), & 3052 cNCOL_ITEM * BT_exp(idom)%nnz, irank, tag, hecCOMM%HECMW_COMM, requests(n_send)) 3053 n_send = n_send + 1 3054 tag = 3003 3055 call HECMW_ISEND_R(BT_exp(idom)%A, ndof2 * BT_exp(idom)%nnz, & 3056 irank, tag, hecCOMM%HECMW_COMM, requests(n_send)) 3057 enddo 3058 ! 3059 ! BT_imp%nr = hecCOMM%import_index(nnb) 3060 BT_imp%nr = hecMESH%n_node - hecMESH%nn_internal 3061 BT_imp%nc = 0 !!! TEMPORARY 3062 BT_imp%nnz = 0 3063 BT_imp%ndof = ndof 3064 ! 3065 allocate(nnz_imp(nnb)) 3066 allocate(cnt(BT_imp%nr)) 3067 ! 3068 cnt(:) = 0 3069 do idom = 1, nnb 3070 irank = hecCOMM%neighbor_pe(idom) 3071 idx_0 = hecCOMM%import_index(idom-1) 3072 idx_n = hecCOMM%import_index(idom) 3073 nr = idx_n - idx_0 3074 if (nr == 0) then 3075 nnz_imp(idom) = 0 3076 cycle 3077 endif 3078 allocate(index_imp(0:nr)) 3079 tag = 3001 3080 call HECMW_RECV_INT(index_imp(0:nr), nr+1, irank, tag, & 3081 hecCOMM%HECMW_COMM, statuses(:,1)) 3082 nnz_imp(idom) = index_imp(nr) 3083 do j = 1, nr 3084 jj = hecCOMM%import_item(idx_0 + j) - hecMESH%nn_internal 3085 if (jj < 1 .or. BT_imp%nr < jj) stop 'ERROR: jj out of range' 3086 if (cnt(jj) /= 0) stop 'ERROR: duplicate import rows?' 3087 cnt(jj) = index_imp(j) - index_imp(j-1) 3088 enddo 3089 deallocate(index_imp) 3090 enddo 3091 ! 3092 allocate(imp_cols_index(0:nnb)) 3093 call make_index(nnb, nnz_imp, imp_cols_index) 3094 deallocate(nnz_imp) 3095 ! 3096 allocate(BT_imp%index(0:BT_imp%nr)) 3097 call make_index(BT_imp%nr, cnt, BT_imp%index) 3098 deallocate(cnt) 3099 ! 3100 BT_imp%nnz = BT_imp%index(BT_imp%nr) 3101 if (BT_imp%nnz /= imp_cols_index(nnb)) & 3102 stop 'ERROR: total num of nonzero of BT_imp' 3103 ! 3104 allocate(imp_cols_item(cNCOL_ITEM, BT_imp%nnz)) 3105 allocate(imp_vals_item(ndof2 * BT_imp%nnz)) 3106 ! 3107 do idom = 1, nnb 3108 irank = hecCOMM%neighbor_pe(idom) 3109 idx_0 = imp_cols_index(idom-1) 3110 idx_n = imp_cols_index(idom) 3111 nnz = idx_n - idx_0 3112 if (nnz == 0) cycle 3113 tag = 3002 3114 call HECMW_RECV_INT(imp_cols_item(1, idx_0 + 1), cNCOL_ITEM * nnz, & 3115 irank, tag, hecCOMM%HECMW_COMM, statuses(:,1)) 3116 tag = 3003 3117 call HECMW_RECV_R(imp_vals_item(ndof2*idx_0 + 1), ndof2 * nnz, & 3118 irank, tag, hecCOMM%HECMW_COMM, statuses(:,1)) 3119 enddo 3120 call HECMW_Waitall(n_send, requests, statuses) 3121 if (DEBUG >= 2) write(0,*) ' DEBUG2: send BT_imp and recv into temporary data done' 3122 ! 3123 deallocate(requests) 3124 deallocate(statuses) 3125 ! 3126 do idom = 1, nnb 3127 call hecmw_localmat_free(BT_exp(idom)) 3128 enddo 3129 deallocate(BT_exp) 3130 deallocate(exp_cols_index) 3131 deallocate(exp_cols_item) 3132 ! 3133 call copy_mesh(hecMESH, hecMESHnew) 3134 ! 3135 call map_imported_cols(hecMESHnew, imp_cols_index(nnb), imp_cols_item, n_add_node, add_nodes, map, i0) 3136 if (DEBUG >= 2) write(0,*) ' DEBUG2: map imported cols done' 3137 ! 3138 call update_comm_table(hecMESHnew, n_add_node, add_nodes, i0) 3139 if (DEBUG >= 2) write(0,*) ' DEBUG2: update comm_table done' 3140 ! 3141 BT_imp%nc = hecMESHnew%n_node 3142 ! 3143 allocate(BT_imp%item(BT_imp%nnz)) 3144 allocate(BT_imp%A(ndof2 * BT_imp%nnz)) 3145 call copy_vals_to_BT_imp(hecCOMM, hecMESH%nn_internal, imp_cols_index, map, imp_vals_item, BT_imp) 3146 if (DEBUG >= 2) write(0,*) ' DEBUG2: copy vals to BT_imp done' 3147 ! 3148 deallocate(imp_cols_index) 3149 deallocate(imp_cols_item) 3150 deallocate(imp_vals_item) 3151 deallocate(map) 3152 end subroutine send_BT_exp_and_recv_BT_imp 3153 3154 subroutine copy_vals_to_BT_imp(hecCOMM, nn_internal, imp_cols_index, map, imp_vals_item, BT_imp) 3155 implicit none 3156 type (hecmwST_matrix_comm), intent(in) :: hecCOMM 3157 integer(kind=kint), intent(in) :: nn_internal 3158 integer(kind=kint), allocatable, intent(in) :: imp_cols_index(:) 3159 integer(kind=kint), intent(in) :: map(:) 3160 real(kind=kreal), intent(in) :: imp_vals_item(:) 3161 type (hecmwST_local_matrix), intent(inout) :: BT_imp 3162 integer(kind=kint) :: nnb, ndof2, idx, idom, idx_0, idx_n, nr, j, jrow, ks, ke, k 3163 nnb = hecCOMM%n_neighbor_pe 3164 ndof2 = BT_imp%ndof ** 2 3165 idx = 0 3166 do idom = 1, nnb 3167 idx_0 = hecCOMM%import_index(idom-1) 3168 idx_n = hecCOMM%import_index(idom) 3169 nr = idx_n - idx_0 3170 if (nr == 0) cycle 3171 do j = 1, nr 3172 jrow = hecCOMM%import_item(idx_0 + j) - nn_internal 3173 ks = BT_imp%index(jrow-1)+1 3174 ke = BT_imp%index(jrow) 3175 do k = ks, ke 3176 idx = idx + 1 3177 BT_imp%item(k) = map(idx) 3178 BT_imp%A(ndof2*(k-1)+1:ndof2*k) = imp_vals_item(ndof2*(idx-1)+1:ndof2*idx) 3179 enddo 3180 enddo 3181 if (idx /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_imp' 3182 enddo 3183 end subroutine copy_vals_to_BT_imp 3184 3185 subroutine concat_BTmat_and_BT_imp(BTmat, BT_imp, BT_all) 3186 implicit none 3187 type (hecmwST_local_matrix), intent(in) :: BTmat 3188 type (hecmwST_local_matrix), intent(in) :: BT_imp 3189 type (hecmwST_local_matrix), intent(out) :: BT_all 3190 integer(kind=kint) :: ndof, ndof2, i, ii 3191 ndof = BTmat%ndof 3192 if (BT_imp%nr > 0 .and. BT_imp%ndof /= ndof) stop 'ERROR: concat BTmat and BT_imp: ndof' 3193 ndof2 = ndof*ndof 3194 BT_all%nr = BTmat%nr + BT_imp%nr 3195 BT_all%nc = max(BTmat%nc, BT_imp%nc) 3196 BT_all%nnz = BTmat%nnz + BT_imp%nnz 3197 BT_all%ndof = ndof 3198 allocate(BT_all%index(0:BT_all%nr)) 3199 allocate(BT_all%item(BT_all%nnz)) 3200 allocate(BT_all%A(ndof2 * BT_all%nnz)) 3201 BT_all%index(0) = 0 3202 do i = 1, BTmat%nr 3203 BT_all%index(i) = BTmat%index(i) 3204 enddo 3205 do i = 1, BT_imp%nr 3206 BT_all%index(BTmat%nr+i) = BT_all%index(BTmat%nr+i-1) + & 3207 BT_imp%index(i) - BT_imp%index(i-1) 3208 enddo 3209 do i = 1, BTmat%nnz 3210 BT_all%item(i) = BTmat%item(i) 3211 BT_all%A(ndof2*(i-1)+1:ndof2*i) = BTmat%A(ndof2*(i-1)+1:ndof2*i) 3212 enddo 3213 do i = 1, BT_imp%nnz 3214 ii = BTmat%nnz + i 3215 BT_all%item(ii) = BT_imp%item(i) 3216 BT_all%A(ndof2*(ii-1)+1:ndof2*ii) = BT_imp%A(ndof2*(i-1)+1:ndof2*i) 3217 enddo 3218 end subroutine concat_BTmat_and_BT_imp 3219 3220 subroutine multiply_mat_mat(Amat, Bmat, Cmat) 3221 implicit none 3222 type (hecmwST_local_matrix), intent(in) :: Amat 3223 type (hecmwST_local_matrix), intent(in) :: Bmat 3224 type (hecmwST_local_matrix), intent(out) :: Cmat 3225 integer(kind=kint) :: ndof, ndof2, nr, nc, nnz, i, icnt 3226 integer(kind=kint) :: js, je, j, jj, ks, ke, k, kk, l, ll, l0 3227 integer(kind=kint), allocatable :: iw(:) 3228 real(kind=kreal), pointer :: Ap(:), Bp(:), Cp(:) 3229 real(kind=kreal) :: t0, t1 3230 t0 = hecmw_wtime() 3231 if (Amat%ndof /= Bmat%ndof) stop 'ERROR: multiply_mat_mat: unmatching ndof' 3232 ndof = Amat%ndof 3233 ndof2 = ndof*ndof 3234 nr = Amat%nr 3235 nc = Bmat%nc 3236 if (Amat%nc /= Bmat%nr) then 3237 write(0,*) 'Amat: nr, nc = ', Amat%nr, Amat%nc 3238 write(0,*) 'Bmat: nr, nc = ', Bmat%nr, Bmat%nc 3239 stop 'ERROR: multiply_mat_mat: unmatching size' 3240 endif 3241 Cmat%ndof = ndof 3242 Cmat%nr = nr 3243 Cmat%nc = nc 3244 allocate(Cmat%index(0:nr)) 3245 Cmat%index(0) = 0 3246 !$omp parallel default(none), & 3247 !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,l), & 3248 !$omp& shared(nr,nc,Amat,Bmat,Cmat) 3249 allocate(iw(nc)) 3250 !$omp do 3251 do i = 1, nr 3252 icnt = 0 3253 js = Amat%index(i-1)+1 3254 je = Amat%index(i) 3255 do j = js, je 3256 jj = Amat%item(j) 3257 ks = Bmat%index(jj-1)+1 3258 ke = Bmat%index(jj) 3259 kl1: do k = ks, ke 3260 kk = Bmat%item(k) 3261 do l = 1, icnt 3262 if (iw(l) == kk) cycle kl1 3263 enddo 3264 icnt = icnt + 1 3265 iw(icnt) = kk 3266 enddo kl1 3267 enddo 3268 Cmat%index(i) = icnt 3269 enddo 3270 !$omp end do 3271 deallocate(iw) 3272 !$omp end parallel 3273 do i = 1, nr 3274 Cmat%index(i) = Cmat%index(i-1) + Cmat%index(i) 3275 enddo 3276 nnz = Cmat%index(nr) 3277 Cmat%nnz = nnz 3278 !write(0,*) 'nnz',nnz 3279 t1 = hecmw_wtime() 3280 if (TIMER >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (1) : ",t1-t0 3281 t0 = hecmw_wtime() 3282 allocate(Cmat%item(nnz)) 3283 allocate(Cmat%A(ndof2 * nnz)) 3284 Cmat%A(:) = 0.0d0 3285 !$omp parallel default(none), & 3286 !$omp& private(i,icnt,l0,js,je,j,jj,Ap,ks,ke,k,kk,Bp,ll,l,Cp), & 3287 !$omp& shared(nr,Cmat,Amat,Bmat,ndof2,ndof) 3288 !$omp do 3289 do i = 1, nr 3290 icnt = 0 3291 l0 = Cmat%index(i-1) 3292 ! item 3293 js = Amat%index(i-1)+1 3294 je = Amat%index(i) 3295 do j = js, je 3296 jj = Amat%item(j) 3297 Ap => Amat%A(ndof2*(j-1)+1:ndof2*j) 3298 ks = Bmat%index(jj-1)+1 3299 ke = Bmat%index(jj) 3300 do k = ks, ke 3301 kk = Bmat%item(k) 3302 Bp => Bmat%A(ndof2*(k-1)+1:ndof2*k) 3303 ll = -1 3304 do l = 1, icnt 3305 if (Cmat%item(l0+l) == kk) then 3306 ll = l0 + l 3307 exit 3308 endif 3309 enddo 3310 if (ll < 0) then 3311 icnt = icnt + 1 3312 ll = l0 + icnt 3313 Cmat%item(ll) = kk 3314 endif 3315 Cp => Cmat%A(ndof2*(ll-1)+1:ndof2*ll) 3316 call blk_matmul_add(ndof, Ap, Bp, Cp) 3317 enddo 3318 enddo 3319 !write(0,*) 'l0,icnt,index(i)',Cmat%index(i-1),icnt,Cmat%index(i) 3320 if (l0+icnt /= Cmat%index(i)) stop 'ERROR: multiply_mat_mat: unknown error' 3321 enddo 3322 !$omp end do 3323 !$omp end parallel 3324 t1 = hecmw_wtime() 3325 if (TIMER >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (2) : ",t1-t0 3326 t0 = hecmw_wtime() 3327 call sort_and_uniq_rows(Cmat) 3328 t1 = hecmw_wtime() 3329 if (TIMER >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (3) : ",t1-t0 3330 end subroutine multiply_mat_mat 3331 3332 subroutine blk_matmul_add(ndof, A, B, AB) 3333 implicit none 3334 integer, intent(in) :: ndof 3335 real(kind=kreal), intent(in) :: A(:), B(:) 3336 real(kind=kreal), intent(inout) :: AB(:) 3337 integer :: ndof2, i, j, k, i0, j0, ij, ik, jk 3338 ndof2=ndof*ndof 3339 do i=1,ndof 3340 i0=(i-1)*ndof 3341 do j=1,ndof 3342 ij=i0+j 3343 j0=(j-1)*ndof 3344 do k=1,ndof 3345 ik=i0+k 3346 jk=j0+k 3347 !$omp atomic 3348 AB(ik)=AB(ik)+A(ij)*B(jk) 3349 enddo 3350 enddo 3351 enddo 3352 end subroutine blk_matmul_add 3353 3354 subroutine hecmw_localmat_make_hecmat(hecMAT, BTtKTmat, hecTKT) 3355 implicit none 3356 type (hecmwST_matrix), intent(in) :: hecMAT 3357 type (hecmwST_local_matrix), intent(in) :: BTtKTmat 3358 type (hecmwST_matrix), intent(inout) :: hecTKT 3359 call make_new_hecmat(hecMAT, BTtKTmat, hecTKT) 3360 end subroutine hecmw_localmat_make_hecmat 3361 3362 subroutine hecmw_localmat_shrink_comm_table(BKmat, hecMESH) 3363 implicit none 3364 type (hecmwST_local_matrix), intent(in) :: BKmat 3365 type (hecmwST_local_mesh), intent(inout) :: hecMESH 3366 type (hecmwST_matrix_comm) :: hecCOMM 3367 call make_comm_table(BKmat, hecMESH, hecCOMM) 3368 deallocate(hecMESH%import_index) 3369 deallocate(hecMESH%import_item) 3370 deallocate(hecMESH%export_index) 3371 deallocate(hecMESH%export_item) 3372 hecMESH%import_index => hecCOMM%import_index 3373 hecMESH%import_item => hecCOMM%import_item 3374 hecMESH%export_index => hecCOMM%export_index 3375 hecMESH%export_item => hecCOMM%export_item 3376 deallocate(hecCOMM%neighbor_pe) 3377 end subroutine hecmw_localmat_shrink_comm_table 3378 3379end module hecmw_local_matrix 3380